aboutsummaryrefslogtreecommitdiff
path: root/arch/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv')
-rw-r--r--arch/riscv/Kbuild7
-rw-r--r--arch/riscv/Kconfig399
-rw-r--r--arch/riscv/Kconfig.erratas58
-rw-r--r--arch/riscv/Kconfig.socs58
-rw-r--r--arch/riscv/Makefile85
-rw-r--r--arch/riscv/boot/.gitignore4
-rw-r--r--arch/riscv/boot/Makefile25
-rw-r--r--arch/riscv/boot/dts/Makefile4
-rw-r--r--arch/riscv/boot/dts/canaan/Makefile3
-rw-r--r--arch/riscv/boot/dts/canaan/canaan_kd233.dts152
-rw-r--r--arch/riscv/boot/dts/canaan/k210.dtsi461
-rw-r--r--arch/riscv/boot/dts/canaan/k210_generic.dts46
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts211
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts213
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_go.dts221
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maixduino.dts186
-rw-r--r--arch/riscv/boot/dts/kendryte/Makefile4
-rw-r--r--arch/riscv/boot/dts/kendryte/k210.dts23
-rw-r--r--arch/riscv/boot/dts/kendryte/k210.dtsi123
-rw-r--r--arch/riscv/boot/dts/microchip/Makefile4
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi39
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts162
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi16
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-polarberry.dts99
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs.dtsi463
-rw-r--r--arch/riscv/boot/dts/sifive/Makefile4
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi69
-rw-r--r--arch/riscv/boot/dts/sifive/fu740-c000.dtsi326
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts12
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts246
-rw-r--r--arch/riscv/boot/dts/starfive/Makefile2
-rw-r--r--arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts164
-rw-r--r--arch/riscv/boot/dts/starfive/jh7100.dtsi230
-rwxr-xr-x[-rw-r--r--]arch/riscv/boot/install.sh21
-rw-r--r--arch/riscv/boot/loader.lds.S3
-rw-r--r--arch/riscv/configs/32-bit.config2
-rw-r--r--arch/riscv/configs/64-bit.config2
-rw-r--r--arch/riscv/configs/defconfig35
-rw-r--r--arch/riscv/configs/nommu_k210_defconfig45
-rw-r--r--arch/riscv/configs/nommu_k210_sdcard_defconfig87
-rw-r--r--arch/riscv/configs/nommu_virt_defconfig12
-rw-r--r--arch/riscv/configs/rv32_defconfig23
-rw-r--r--arch/riscv/errata/Makefile2
-rw-r--r--arch/riscv/errata/sifive/Makefile2
-rw-r--r--arch/riscv/errata/sifive/errata.c116
-rw-r--r--arch/riscv/errata/sifive/errata_cip_453.S38
-rw-r--r--arch/riscv/errata/thead/Makefile11
-rw-r--r--arch/riscv/errata/thead/errata.c82
-rw-r--r--arch/riscv/include/asm/Kbuild8
-rw-r--r--arch/riscv/include/asm/alternative-macros.h205
-rw-r--r--arch/riscv/include/asm/alternative.h62
-rw-r--r--arch/riscv/include/asm/asm-extable.h65
-rw-r--r--arch/riscv/include/asm/asm-prototypes.h19
-rw-r--r--arch/riscv/include/asm/asm.h1
-rw-r--r--arch/riscv/include/asm/atomic.h224
-rw-r--r--arch/riscv/include/asm/barrier.h10
-rw-r--r--arch/riscv/include/asm/bitops.h1
-rw-r--r--arch/riscv/include/asm/bug.h5
-rw-r--r--arch/riscv/include/asm/cacheinfo.h5
-rw-r--r--arch/riscv/include/asm/clint.h57
-rw-r--r--arch/riscv/include/asm/cmpxchg.h50
-rw-r--r--arch/riscv/include/asm/compat.h129
-rw-r--r--arch/riscv/include/asm/cpu_ops.h2
-rw-r--r--arch/riscv/include/asm/cpu_ops_sbi.h25
-rw-r--r--arch/riscv/include/asm/cpuidle.h24
-rw-r--r--arch/riscv/include/asm/csr.h174
-rw-r--r--arch/riscv/include/asm/current.h2
-rw-r--r--arch/riscv/include/asm/efi.h45
-rw-r--r--arch/riscv/include/asm/elf.h72
-rw-r--r--arch/riscv/include/asm/errata_list.h98
-rw-r--r--arch/riscv/include/asm/extable.h48
-rw-r--r--arch/riscv/include/asm/fixmap.h20
-rw-r--r--arch/riscv/include/asm/ftrace.h21
-rw-r--r--arch/riscv/include/asm/futex.h30
-rw-r--r--arch/riscv/include/asm/gdb_xml.h3
-rw-r--r--arch/riscv/include/asm/gpr-num.h77
-rw-r--r--arch/riscv/include/asm/hwcap.h27
-rw-r--r--arch/riscv/include/asm/io.h14
-rw-r--r--arch/riscv/include/asm/irq.h2
-rw-r--r--arch/riscv/include/asm/irq_work.h10
-rw-r--r--arch/riscv/include/asm/jump_label.h60
-rw-r--r--arch/riscv/include/asm/kasan.h32
-rw-r--r--arch/riscv/include/asm/kexec.h67
-rw-r--r--arch/riscv/include/asm/kfence.h63
-rw-r--r--arch/riscv/include/asm/kgdb.h5
-rw-r--r--arch/riscv/include/asm/kprobes.h33
-rw-r--r--arch/riscv/include/asm/kvm_host.h330
-rw-r--r--arch/riscv/include/asm/kvm_types.h7
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_fp.h59
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_sbi.h36
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_timer.h44
-rw-r--r--arch/riscv/include/asm/mmio.h6
-rw-r--r--arch/riscv/include/asm/mmu.h5
-rw-r--r--arch/riscv/include/asm/mmu_context.h30
-rw-r--r--arch/riscv/include/asm/mmzone.h13
-rw-r--r--arch/riscv/include/asm/module.lds.h (renamed from arch/riscv/kernel/module.lds)9
-rw-r--r--arch/riscv/include/asm/numa.h8
-rw-r--r--arch/riscv/include/asm/page.h91
-rw-r--r--arch/riscv/include/asm/pci.h16
-rw-r--r--arch/riscv/include/asm/perf_event.h72
-rw-r--r--arch/riscv/include/asm/pgalloc.h109
-rw-r--r--arch/riscv/include/asm/pgtable-32.h19
-rw-r--r--arch/riscv/include/asm/pgtable-64.h303
-rw-r--r--arch/riscv/include/asm/pgtable-bits.h17
-rw-r--r--arch/riscv/include/asm/pgtable.h468
-rw-r--r--arch/riscv/include/asm/probes.h24
-rw-r--r--arch/riscv/include/asm/processor.h18
-rw-r--r--arch/riscv/include/asm/ptrace.h71
-rw-r--r--arch/riscv/include/asm/sbi.h189
-rw-r--r--arch/riscv/include/asm/seccomp.h10
-rw-r--r--arch/riscv/include/asm/sections.h33
-rw-r--r--arch/riscv/include/asm/set_memory.h25
-rw-r--r--arch/riscv/include/asm/signal32.h18
-rw-r--r--arch/riscv/include/asm/smp.h26
-rw-r--r--arch/riscv/include/asm/soc.h40
-rw-r--r--arch/riscv/include/asm/sparsemem.h6
-rw-r--r--arch/riscv/include/asm/spinlock.h135
-rw-r--r--arch/riscv/include/asm/spinlock_types.h25
-rw-r--r--arch/riscv/include/asm/stackprotector.h30
-rw-r--r--arch/riscv/include/asm/stacktrace.h19
-rw-r--r--arch/riscv/include/asm/string.h11
-rw-r--r--arch/riscv/include/asm/suspend.h36
-rw-r--r--arch/riscv/include/asm/switch_to.h11
-rw-r--r--arch/riscv/include/asm/syscall.h13
-rw-r--r--arch/riscv/include/asm/thread_info.h41
-rw-r--r--arch/riscv/include/asm/timex.h64
-rw-r--r--arch/riscv/include/asm/tlbflush.h8
-rw-r--r--arch/riscv/include/asm/uaccess.h375
-rw-r--r--arch/riscv/include/asm/unistd.h11
-rw-r--r--arch/riscv/include/asm/uprobes.h40
-rw-r--r--arch/riscv/include/asm/vdso.h39
-rw-r--r--arch/riscv/include/asm/vdso/gettimeofday.h11
-rw-r--r--arch/riscv/include/asm/vdso/processor.h2
-rw-r--r--arch/riscv/include/asm/vendorid_list.h11
-rw-r--r--arch/riscv/include/asm/xip_fixup.h31
-rw-r--r--arch/riscv/include/uapi/asm/auxvec.h24
-rw-r--r--arch/riscv/include/uapi/asm/hwcap.h2
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h148
-rw-r--r--arch/riscv/include/uapi/asm/unistd.h8
-rw-r--r--arch/riscv/kernel/Makefile46
-rw-r--r--arch/riscv/kernel/alternative.c118
-rw-r--r--arch/riscv/kernel/asm-offsets.c173
-rw-r--r--arch/riscv/kernel/cacheinfo.c114
-rw-r--r--arch/riscv/kernel/clint.c44
-rw-r--r--arch/riscv/kernel/compat_signal.c243
-rw-r--r--arch/riscv/kernel/compat_syscall_table.c19
-rw-r--r--arch/riscv/kernel/compat_vdso/.gitignore2
-rw-r--r--arch/riscv/kernel/compat_vdso/Makefile78
-rw-r--r--arch/riscv/kernel/compat_vdso/compat_vdso.S8
-rw-r--r--arch/riscv/kernel/compat_vdso/compat_vdso.lds.S3
-rw-r--r--arch/riscv/kernel/compat_vdso/flush_icache.S3
-rwxr-xr-xarch/riscv/kernel/compat_vdso/gen_compat_vdso_offsets.sh5
-rw-r--r--arch/riscv/kernel/compat_vdso/getcpu.S3
-rw-r--r--arch/riscv/kernel/compat_vdso/note.S3
-rw-r--r--arch/riscv/kernel/compat_vdso/rt_sigreturn.S3
-rw-r--r--arch/riscv/kernel/cpu-hotplug.c10
-rw-r--r--arch/riscv/kernel/cpu.c99
-rw-r--r--arch/riscv/kernel/cpu_ops.c26
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c28
-rw-r--r--arch/riscv/kernel/cpu_ops_spinwait.c27
-rw-r--r--arch/riscv/kernel/cpufeature.c212
-rw-r--r--arch/riscv/kernel/crash_dump.c28
-rw-r--r--arch/riscv/kernel/crash_save_regs.S56
-rw-r--r--arch/riscv/kernel/efi-header.S111
-rw-r--r--arch/riscv/kernel/efi.c96
-rw-r--r--arch/riscv/kernel/elf_kexec.c448
-rw-r--r--arch/riscv/kernel/entry.S218
-rw-r--r--arch/riscv/kernel/ftrace.c128
-rw-r--r--arch/riscv/kernel/head.S139
-rw-r--r--arch/riscv/kernel/head.h11
-rw-r--r--arch/riscv/kernel/image-vars.h51
-rw-r--r--arch/riscv/kernel/jump_label.c53
-rw-r--r--arch/riscv/kernel/kexec_relocate.S221
-rw-r--r--arch/riscv/kernel/kgdb.c18
-rw-r--r--arch/riscv/kernel/machine_kexec.c197
-rw-r--r--arch/riscv/kernel/machine_kexec_file.c14
-rw-r--r--arch/riscv/kernel/mcount-dyn.S342
-rw-r--r--arch/riscv/kernel/mcount.S10
-rw-r--r--arch/riscv/kernel/module.c78
-rw-r--r--arch/riscv/kernel/patch.c14
-rw-r--r--arch/riscv/kernel/perf_callchain.c26
-rw-r--r--arch/riscv/kernel/perf_event.c485
-rw-r--r--arch/riscv/kernel/perf_regs.c3
-rw-r--r--arch/riscv/kernel/probes/Makefile6
-rw-r--r--arch/riscv/kernel/probes/decode-insn.c47
-rw-r--r--arch/riscv/kernel/probes/decode-insn.h18
-rw-r--r--arch/riscv/kernel/probes/ftrace.c62
-rw-r--r--arch/riscv/kernel/probes/kprobes.c369
-rw-r--r--arch/riscv/kernel/probes/kprobes_trampoline.S93
-rw-r--r--arch/riscv/kernel/probes/simulate-insn.c197
-rw-r--r--arch/riscv/kernel/probes/simulate-insn.h47
-rw-r--r--arch/riscv/kernel/probes/uprobes.c186
-rw-r--r--arch/riscv/kernel/process.c80
-rw-r--r--arch/riscv/kernel/ptrace.c224
-rw-r--r--arch/riscv/kernel/reset.c8
-rw-r--r--arch/riscv/kernel/riscv_ksyms.c2
-rw-r--r--arch/riscv/kernel/sbi.c317
-rw-r--r--arch/riscv/kernel/setup.c260
-rw-r--r--arch/riscv/kernel/signal.c28
-rw-r--r--arch/riscv/kernel/smp.c92
-rw-r--r--arch/riscv/kernel/smpboot.c31
-rw-r--r--arch/riscv/kernel/soc.c27
-rw-r--r--arch/riscv/kernel/stacktrace.c102
-rw-r--r--arch/riscv/kernel/suspend.c87
-rw-r--r--arch/riscv/kernel/suspend_entry.S125
-rw-r--r--arch/riscv/kernel/sys_riscv.c12
-rw-r--r--arch/riscv/kernel/syscall_table.c3
-rw-r--r--arch/riscv/kernel/time.c6
-rw-r--r--arch/riscv/kernel/trace_irq.c27
-rw-r--r--arch/riscv/kernel/trace_irq.h11
-rw-r--r--arch/riscv/kernel/traps.c85
-rw-r--r--arch/riscv/kernel/vdso.c303
-rw-r--r--arch/riscv/kernel/vdso/.gitignore1
-rw-r--r--arch/riscv/kernel/vdso/Makefile49
-rwxr-xr-xarch/riscv/kernel/vdso/gen_vdso_offsets.sh5
-rw-r--r--arch/riscv/kernel/vdso/vdso.S6
-rw-r--r--arch/riscv/kernel/vdso/vdso.lds.S6
-rw-r--r--arch/riscv/kernel/vdso/vgettimeofday.c6
-rw-r--r--arch/riscv/kernel/vmlinux-xip.lds.S147
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S99
-rw-r--r--arch/riscv/kvm/Kconfig35
-rw-r--r--arch/riscv/kvm/Makefile26
-rw-r--r--arch/riscv/kvm/main.c129
-rw-r--r--arch/riscv/kvm/mmu.c786
-rw-r--r--arch/riscv/kvm/tlb.c461
-rw-r--r--arch/riscv/kvm/vcpu.c994
-rw-r--r--arch/riscv/kvm/vcpu_exit.c711
-rw-r--r--arch/riscv/kvm/vcpu_fp.c168
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c190
-rw-r--r--arch/riscv/kvm/vcpu_sbi_base.c100
-rw-r--r--arch/riscv/kvm/vcpu_sbi_hsm.c119
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c174
-rw-r--r--arch/riscv/kvm/vcpu_sbi_v01.c118
-rw-r--r--arch/riscv/kvm/vcpu_switch.S408
-rw-r--r--arch/riscv/kvm/vcpu_timer.c225
-rw-r--r--arch/riscv/kvm/vm.c92
-rw-r--r--arch/riscv/kvm/vmid.c124
-rw-r--r--arch/riscv/lib/Makefile5
-rw-r--r--arch/riscv/lib/delay.c4
-rw-r--r--arch/riscv/lib/error-inject.c10
-rw-r--r--arch/riscv/lib/memmove.S316
-rw-r--r--arch/riscv/lib/uaccess.S206
-rw-r--r--arch/riscv/mm/Makefile8
-rw-r--r--arch/riscv/mm/cacheflush.c7
-rw-r--r--arch/riscv/mm/context.c273
-rw-r--r--arch/riscv/mm/extable.c61
-rw-r--r--arch/riscv/mm/fault.c420
-rw-r--r--arch/riscv/mm/init.c1126
-rw-r--r--arch/riscv/mm/kasan_init.c482
-rw-r--r--arch/riscv/mm/pageattr.c54
-rw-r--r--arch/riscv/mm/physaddr.c6
-rw-r--r--arch/riscv/mm/ptdump.c121
-rw-r--r--arch/riscv/mm/tlbflush.c66
-rw-r--r--arch/riscv/net/bpf_jit.h551
-rw-r--r--arch/riscv/net/bpf_jit_comp32.c48
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c587
-rw-r--r--arch/riscv/net/bpf_jit_core.c45
-rw-r--r--arch/riscv/purgatory/.gitignore4
-rw-r--r--arch/riscv/purgatory/Makefile95
-rw-r--r--arch/riscv/purgatory/entry.S47
-rw-r--r--arch/riscv/purgatory/purgatory.c45
261 files changed, 22651 insertions, 3576 deletions
diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild
index 4614c01ba5b3..afa83e307a2e 100644
--- a/arch/riscv/Kbuild
+++ b/arch/riscv/Kbuild
@@ -2,3 +2,10 @@
obj-y += kernel/ mm/ net/
obj-$(CONFIG_BUILTIN_DTB) += boot/dts/
+obj-y += errata/
+obj-$(CONFIG_KVM) += kvm/
+
+obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/
+
+# for cleaning
+subdir- += boot
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 128192e14ff2..32ffef9f6e5b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -13,56 +13,103 @@ config 32BIT
config RISCV
def_bool y
select ARCH_CLOCKSOURCE_INIT
+ select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
+ select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_HAS_BINFMT_FLAT
+ select ARCH_HAS_CURRENT_STACK_POINTER
+ select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEBUG_WX
+ select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
+ select ARCH_HAS_KCOV
select ARCH_HAS_MMIOWB
select ARCH_HAS_PTE_SPECIAL
- select ARCH_HAS_SET_DIRECT_MAP
- select ARCH_HAS_SET_MEMORY
- select ARCH_HAS_STRICT_KERNEL_RWX if MMU
+ select ARCH_HAS_SET_DIRECT_MAP if MMU
+ select ARCH_HAS_SET_MEMORY if MMU
+ select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
+ select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
+ select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+ select ARCH_STACKWALK
+ select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
+ select ARCH_SUPPORTS_HUGETLBFS if MMU
+ select ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ select ARCH_USE_MEMTEST
+ select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
+ select ARCH_WANT_GENERAL_HUGETLB
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
+ select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
+ select BUILDTIME_TABLE_SORT if MMU
select CLONE_BACKWARDS
+ select CLINT_TIMER if !MMU
select COMMON_CLK
+ select CPU_PM if CPU_IDLE
select EDAC_SUPPORT
select GENERIC_ARCH_TOPOLOGY if SMP
select GENERIC_ATOMIC64 if !64BIT
- select GENERIC_CLOCKEVENTS
+ select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+ select GENERIC_EARLY_IOREMAP
select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
- select GENERIC_IOREMAP
+ select GENERIC_IDLE_POLL_SETUP
+ select GENERIC_IOREMAP if MMU
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_IRQ_SHOW
+ select GENERIC_IRQ_SHOW_LEVEL
+ select GENERIC_LIB_DEVMEM_IS_ALLOWED
select GENERIC_PCI_IOMAP
select GENERIC_PTDUMP if MMU
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
- select GENERIC_STRNCPY_FROM_USER if MMU
- select GENERIC_STRNLEN_USER if MMU
select GENERIC_TIME_VSYSCALL if MMU && 64BIT
- select HANDLE_DOMAIN_IRQ
+ select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
+ select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
select HAVE_ARCH_KASAN if MMU && 64BIT
- select HAVE_ARCH_KGDB
+ select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
+ select HAVE_ARCH_KFENCE if MMU && 64BIT
+ select HAVE_ARCH_KGDB if !XIP_KERNEL
select HAVE_ARCH_KGDB_QXFER_PKT
select HAVE_ARCH_MMAP_RND_BITS if MMU
+ select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_THREAD_STRUCT_WHITELIST
+ select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
select HAVE_ASM_MODVERSIONS
- select HAVE_COPY_THREAD_TLS
+ select HAVE_CONTEXT_TRACKING
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_EBPF_JIT if MMU
- select HAVE_FUTEX_CMPXCHG if FUTEX
+ select HAVE_FUNCTION_ERROR_INJECTION
+ select HAVE_GCC_PLUGINS
select HAVE_GENERIC_VDSO if MMU && 64BIT
+ select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_KPROBES if !XIP_KERNEL
+ select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
+ select HAVE_KRETPROBES if !XIP_KERNEL
+ select HAVE_MOVE_PMD
+ select HAVE_MOVE_PUD
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_FUNCTION_ARG_ACCESS_API
+ select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_RSEQ
select IRQ_DOMAIN
+ select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA if MODULES
select MODULE_SECTIONS if MODULES
select OF
@@ -71,22 +118,30 @@ config RISCV
select PCI_DOMAINS_GENERIC if PCI
select PCI_MSI if PCI
select RISCV_INTC
- select RISCV_TIMER
- select SPARSEMEM_STATIC if 32BIT
+ select RISCV_TIMER if RISCV_SBI
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
+ select TRACE_IRQFLAGS_SUPPORT
+ select UACCESS_MEMCPY if !MMU
+ select ZONE_DMA32 if 64BIT
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
default 8
+config ARCH_MMAP_RND_COMPAT_BITS_MIN
+ default 8
+
# max bits determined by the following formula:
# VA_BITS - PAGE_SHIFT - 3
config ARCH_MMAP_RND_BITS_MAX
default 24 if 64BIT # SV39 based
default 17
+config ARCH_MMAP_RND_COMPAT_BITS_MAX
+ default 17
+
# set if we run in machine mode, cleared if we run in supervisor mode
config RISCV_M_MODE
bool
@@ -105,54 +160,36 @@ config MMU
Select if you want MMU-based virtualised addressing space
support by paged memory management. If unsure, say 'Y'.
-config ZONE_DMA32
- bool
- default y if 64BIT
-
-config VA_BITS
- int
- default 32 if 32BIT
- default 39 if 64BIT
-
-config PA_BITS
- int
- default 34 if 32BIT
- default 56 if 64BIT
-
config PAGE_OFFSET
hex
- default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
+ default 0xC0000000 if 32BIT
default 0x80000000 if 64BIT && !MMU
- default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
- default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
+ default 0xff60000000000000 if 64BIT
+
+config KASAN_SHADOW_OFFSET
+ hex
+ depends on KASAN_GENERIC
+ default 0xdfffffff00000000 if 64BIT
+ default 0xffffffff if 32BIT
config ARCH_FLATMEM_ENABLE
- def_bool y
+ def_bool !NUMA
config ARCH_SPARSEMEM_ENABLE
def_bool y
depends on MMU
- select SPARSEMEM_VMEMMAP_ENABLE
+ select SPARSEMEM_STATIC if 32BIT && SPARSEMEM
+ select SPARSEMEM_VMEMMAP_ENABLE if 64BIT
config ARCH_SELECT_MEMORY_MODEL
def_bool ARCH_SPARSEMEM_ENABLE
-config ARCH_WANT_GENERAL_HUGETLB
- def_bool y
-
-config ARCH_SUPPORTS_DEBUG_PAGEALLOC
- def_bool y
-
-config SYS_SUPPORTS_HUGETLBFS
- depends on MMU
+config ARCH_SUPPORTS_UPROBES
def_bool y
config STACKTRACE_SUPPORT
def_bool y
-config TRACE_IRQFLAGS_SUPPORT
- def_bool y
-
config GENERIC_BUG
def_bool y
depends on BUG
@@ -175,10 +212,14 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
- default 3 if 64BIT
+ default 5 if 64BIT
default 2
+config LOCKDEP_SUPPORT
+ def_bool y
+
source "arch/riscv/Kconfig.socs"
+source "arch/riscv/Kconfig.erratas"
menu "Platform type"
@@ -201,12 +242,12 @@ config ARCH_RV32I
config ARCH_RV64I
bool "RV64I"
select 64BIT
- select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000
- select HAVE_DYNAMIC_FTRACE if MMU
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
+ select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8)
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
- select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER
- select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_TRACER if !XIP_KERNEL
select SWIOTLB if MMU
endchoice
@@ -229,20 +270,6 @@ config MODULE_SECTIONS
bool
select HAVE_MOD_ARCH_SPECIFIC
-choice
- prompt "Maximum Physical Memory"
- default MAXPHYSMEM_2GB if 32BIT
- default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW
- default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY
-
- config MAXPHYSMEM_2GB
- bool "2GiB"
- config MAXPHYSMEM_128GB
- depends on 64BIT && CMODEL_MEDANY
- bool "128GiB"
-endchoice
-
-
config SMP
bool "Symmetric Multi-Processing"
help
@@ -282,6 +309,45 @@ config TUNE_GENERIC
endchoice
+# Common NUMA Features
+config NUMA
+ bool "NUMA Memory Allocation and Scheduler Support"
+ depends on SMP && MMU
+ select GENERIC_ARCH_NUMA
+ select OF_NUMA
+ select ARCH_SUPPORTS_NUMA_BALANCING
+ select USE_PERCPU_NUMA_NODE_ID
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ help
+ Enable NUMA (Non-Uniform Memory Access) support.
+
+ The kernel will try to allocate memory used by a CPU on the
+ local memory of the CPU and add some more NUMA awareness to the kernel.
+
+config NODES_SHIFT
+ int "Maximum NUMA Nodes (as a power of 2)"
+ range 1 10
+ default "2"
+ depends on NUMA
+ help
+ Specify the maximum number of NUMA Nodes available on the target
+ system. Increases memory reserved to accommodate various tables.
+
+config RISCV_ALTERNATIVE
+ bool
+ depends on !XIP_KERNEL
+ help
+ This Kconfig allows the kernel to automatically patch the
+ errata required by the execution platform at run time. The
+ code patching is performed once in the boot stages. It means
+ that the overhead from this mechanism is just taken once.
+
+config RISCV_ALTERNATIVE_EARLY
+ bool
+ depends on RISCV_ALTERNATIVE
+ help
+ Allows early patching of the kernel for special errata
+
config RISCV_ISA_C
bool "Emit compressed instructions when building Linux"
default y
@@ -292,18 +358,23 @@ config RISCV_ISA_C
If you don't know what to do here, say Y.
-menu "supported PMU type"
- depends on PERF_EVENTS
-
-config RISCV_BASE_PMU
- bool "Base Performance Monitoring Unit"
- def_bool y
+config RISCV_ISA_SVPBMT
+ bool "SVPBMT extension support"
+ depends on 64BIT && MMU
+ select RISCV_ALTERNATIVE
+ default y
help
- A base PMU that serves as a reference implementation and has limited
- feature of perf. It can run on any RISC-V machines so serves as the
- fallback, but this option can also be disable to reduce kernel size.
+ Adds support to dynamically detect the presence of the SVPBMT
+ ISA-extension (Supervisor-mode: page-based memory types) and
+ enable its usage.
-endmenu
+ The memory type for a page contains a combination of attributes
+ that indicate the cacheability, idempotency, and ordering
+ properties for access to that page.
+
+ The SVPBMT extension is only available on 64Bit cpus.
+
+ If you don't know what to do here, say Y.
config FPU
bool "FPU support"
@@ -320,26 +391,89 @@ menu "Kernel features"
source "kernel/Kconfig.hz"
-config SECCOMP
- bool "Enable seccomp to safely compute untrusted bytecode"
- help
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
config RISCV_SBI_V01
bool "SBI v0.1 support"
- default y
depends on RISCV_SBI
help
This config allows kernel to use SBI v0.1 APIs. This will be
deprecated in future once legacy M-mode software are no longer in use.
+
+config RISCV_BOOT_SPINWAIT
+ bool "Spinwait booting method"
+ depends on SMP
+ default y if RISCV_SBI_V01 || RISCV_M_MODE
+ help
+ This enables support for booting Linux via spinwait method. In the
+ spinwait method, all cores randomly jump to Linux. One of the cores
+ gets chosen via lottery and all other keep spinning on a percpu
+ variable. This method cannot support CPU hotplug and sparse hartid
+ scheme. It should be only enabled for M-mode Linux or platforms relying
+ on older firmware without SBI HSM extension. All other platforms should
+ rely on ordered booting via SBI HSM extension which gets chosen
+ dynamically at runtime if the firmware supports it.
+
+ Since spinwait is incompatible with sparse hart IDs, it requires
+ NR_CPUS be large enough to contain the physical hart ID of the first
+ hart to enter Linux.
+
+ If unsure what to do here, say N.
+
+config KEXEC
+ bool "Kexec system call"
+ select KEXEC_CORE
+ select HOTPLUG_CPU if SMP
+ depends on MMU
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is independent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similarity to the exec system call.
+
+config KEXEC_FILE
+ bool "kexec file based systmem call"
+ select KEXEC_CORE
+ select KEXEC_ELF
+ select HAVE_IMA_KEXEC if IMA
+ depends on 64BIT
+ help
+ This is new version of kexec system call. This system call is
+ file based and takes file descriptors as system call argument
+ for kernel and initramfs as opposed to list of segments as
+ accepted by previous system call.
+
+ If you don't know what to do here, say Y.
+
+config ARCH_HAS_KEXEC_PURGATORY
+ def_bool KEXEC_FILE
+ select BUILD_BIN2C
+ depends on CRYPTO=y
+ depends on CRYPTO_SHA256=y
+
+config CRASH_DUMP
+ bool "Build kdump crash kernel"
+ help
+ Generate crash dump after being started by kexec. This should
+ be normally only set in special crash dump kernels which are
+ loaded in the main kernel with kexec-tools into a specially
+ reserved region and then later executed after a crash by
+ kdump/kexec.
+
+ For more details see Documentation/admin-guide/kdump/kdump.rst
+
+config COMPAT
+ bool "Kernel support for 32-bit U-mode"
+ default 64BIT
+ depends on 64BIT && MMU
+ help
+ This option enables support for a 32-bit U-mode running under a 64-bit
+ kernel at S-mode. riscv32-specific components such as system calls,
+ the user helper functions (vdso), signal rt_frame functions and the
+ ptrace interface are handled appropriately by the kernel.
+
+ If you want to execute 32-bit userspace applications, say Y.
+
endmenu
menu "Boot options"
@@ -387,15 +521,104 @@ config CMDLINE_FORCE
endchoice
+config EFI_STUB
+ bool
+
+config EFI
+ bool "UEFI runtime support"
+ depends on OF && !XIP_KERNEL
+ select LIBFDT
+ select UCS2_STRING
+ select EFI_PARAMS_FROM_FDT
+ select EFI_STUB
+ select EFI_GENERIC_STUB
+ select EFI_RUNTIME_WRAPPERS
+ select RISCV_ISA_C
+ depends on MMU
+ default y
+ help
+ This option provides support for runtime services provided
+ by UEFI firmware (such as non-volatile variables, realtime
+ clock, and platform reset). A UEFI stub is also provided to
+ allow the kernel to be booted as an EFI application. This
+ is only useful on systems that have UEFI firmware.
+
+config CC_HAVE_STACKPROTECTOR_TLS
+ def_bool $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=tp -mstack-protector-guard-offset=0)
+
+config STACKPROTECTOR_PER_TASK
+ def_bool y
+ depends on !RANDSTRUCT
+ depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS
+
+config PHYS_RAM_BASE_FIXED
+ bool "Explicitly specified physical RAM address"
+ default n
+
+config PHYS_RAM_BASE
+ hex "Platform Physical RAM address"
+ depends on PHYS_RAM_BASE_FIXED
+ default "0x80000000"
+ help
+ This is the physical address of RAM in the system. It has to be
+ explicitly specified to run early relocations of read-write data
+ from flash to RAM.
+
+config XIP_KERNEL
+ bool "Kernel Execute-In-Place from ROM"
+ depends on MMU && SPARSEMEM
+ # This prevents XIP from being enabled by all{yes,mod}config, which
+ # fail to build since XIP doesn't support large kernels.
+ depends on !COMPILE_TEST
+ select PHYS_RAM_BASE_FIXED
+ help
+ Execute-In-Place allows the kernel to run from non-volatile storage
+ directly addressable by the CPU, such as NOR flash. This saves RAM
+ space since the text section of the kernel is not loaded from flash
+ to RAM. Read-write sections, such as the data section and stack,
+ are still copied to RAM. The XIP kernel is not compressed since
+ it has to run directly from flash, so it will take more space to
+ store it. The flash address used to link the kernel object files,
+ and for storing it, is configuration dependent. Therefore, if you
+ say Y here, you must know the proper physical address where to
+ store the kernel image depending on your own flash memory usage.
+
+ Also note that the make target becomes "make xipImage" rather than
+ "make zImage" or "make Image". The final kernel binary to put in
+ ROM memory will be arch/riscv/boot/xipImage.
+
+ SPARSEMEM is required because the kernel text and rodata that are
+ flash resident are not backed by memmap, then any attempt to get
+ a struct page on those regions will trigger a fault.
+
+ If unsure, say N.
+
+config XIP_PHYS_ADDR
+ hex "XIP Kernel Physical Location"
+ depends on XIP_KERNEL
+ default "0x21000000"
+ help
+ This is the physical address in your flash memory the kernel will
+ be linked for and stored to. This address is dependent on your
+ own flash usage.
+
endmenu
config BUILTIN_DTB
- def_bool n
- depends on RISCV_M_MODE
+ bool
depends on OF
+ default y if XIP_KERNEL
menu "Power management options"
source "kernel/power/Kconfig"
endmenu
+
+menu "CPU Power Management"
+
+source "drivers/cpuidle/Kconfig"
+
+endmenu
+
+source "arch/riscv/kvm/Kconfig"
diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
new file mode 100644
index 000000000000..457ac72c9b36
--- /dev/null
+++ b/arch/riscv/Kconfig.erratas
@@ -0,0 +1,58 @@
+menu "CPU errata selection"
+
+config ERRATA_SIFIVE
+ bool "SiFive errata"
+ depends on !XIP_KERNEL
+ select RISCV_ALTERNATIVE
+ help
+ All SiFive errata Kconfig depend on this Kconfig. Disabling
+ this Kconfig will disable all SiFive errata. Please say "Y"
+ here if your platform uses SiFive CPU cores.
+
+ Otherwise, please say "N" here to avoid unnecessary overhead.
+
+config ERRATA_SIFIVE_CIP_453
+ bool "Apply SiFive errata CIP-453"
+ depends on ERRATA_SIFIVE && 64BIT
+ default y
+ help
+ This will apply the SiFive CIP-453 errata to add sign extension
+ to the $badaddr when exception type is instruction page fault
+ and instruction access fault.
+
+ If you don't know what to do here, say "Y".
+
+config ERRATA_SIFIVE_CIP_1200
+ bool "Apply SiFive errata CIP-1200"
+ depends on ERRATA_SIFIVE && 64BIT
+ default y
+ help
+ This will apply the SiFive CIP-1200 errata to repalce all
+ "sfence.vma addr" with "sfence.vma" to ensure that the addr
+ has been flushed from TLB.
+
+ If you don't know what to do here, say "Y".
+
+config ERRATA_THEAD
+ bool "T-HEAD errata"
+ depends on !XIP_KERNEL
+ select RISCV_ALTERNATIVE
+ help
+ All T-HEAD errata Kconfig depend on this Kconfig. Disabling
+ this Kconfig will disable all T-HEAD errata. Please say "Y"
+ here if your platform uses T-HEAD CPU cores.
+
+ Otherwise, please say "N" here to avoid unnecessary overhead.
+
+config ERRATA_THEAD_PBMT
+ bool "Apply T-Head memory type errata"
+ depends on ERRATA_THEAD && 64BIT
+ select RISCV_ALTERNATIVE_EARLY
+ default y
+ help
+ This will apply the memory type errata to handle the non-standard
+ memory type bits in page-table-entries on T-Head SoCs.
+
+ If you don't know what to do here, say "Y".
+
+endmenu
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 6c88148f1b9b..85670dc9fe95 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -1,49 +1,83 @@
menu "SoC selection"
+config SOC_MICROCHIP_POLARFIRE
+ bool "Microchip PolarFire SoCs"
+ select MCHP_CLK_MPFS
+ select SIFIVE_PLIC
+ help
+ This enables support for Microchip PolarFire SoC platforms.
+
config SOC_SIFIVE
bool "SiFive SoCs"
select SERIAL_SIFIVE if TTY
select SERIAL_SIFIVE_CONSOLE if TTY
select CLK_SIFIVE
- select CLK_SIFIVE_FU540_PRCI
+ select CLK_SIFIVE_PRCI
select SIFIVE_PLIC
+ select ERRATA_SIFIVE if !XIP_KERNEL
help
This enables support for SiFive SoC platform hardware.
+config SOC_STARFIVE
+ bool "StarFive SoCs"
+ select PINCTRL
+ select RESET_CONTROLLER
+ select SIFIVE_PLIC
+ help
+ This enables support for StarFive SoC platform hardware.
+
config SOC_VIRT
bool "QEMU Virt Machine"
+ select CLINT_TIMER if RISCV_M_MODE
select POWER_RESET
select POWER_RESET_SYSCON
select POWER_RESET_SYSCON_POWEROFF
select GOLDFISH
select RTC_DRV_GOLDFISH if RTC_CLASS
select SIFIVE_PLIC
+ select PM_GENERIC_DOMAINS if PM
+ select PM_GENERIC_DOMAINS_OF if PM && OF
+ select RISCV_SBI_CPUIDLE if CPU_IDLE && RISCV_SBI
help
This enables support for QEMU Virt Machine.
-config SOC_KENDRYTE
- bool "Kendryte K210 SoC"
+config SOC_CANAAN
+ bool "Canaan Kendryte K210 SoC"
depends on !MMU
+ select CLINT_TIMER if RISCV_M_MODE
select SERIAL_SIFIVE if TTY
select SERIAL_SIFIVE_CONSOLE if TTY
select SIFIVE_PLIC
+ select ARCH_HAS_RESET_CONTROLLER
+ select PINCTRL
+ select COMMON_CLK
+ select COMMON_CLK_K210
help
- This enables support for Kendryte K210 SoC platform hardware.
+ This enables support for Canaan Kendryte K210 SoC platform hardware.
-config SOC_KENDRYTE_K210_DTB
- def_bool y
- depends on SOC_KENDRYTE_K210_DTB_BUILTIN
+if SOC_CANAAN
-config SOC_KENDRYTE_K210_DTB_BUILTIN
- bool "Builtin device tree for the Kendryte K210"
- depends on SOC_KENDRYTE
+config SOC_CANAAN_K210_DTB_BUILTIN
+ bool "Builtin device tree for the Canaan Kendryte K210"
+ depends on SOC_CANAAN
default y
select OF
select BUILTIN_DTB
- select SOC_KENDRYTE_K210_DTB
help
- Builds a device tree for the Kendryte K210 into the Linux image.
+ Build a device tree for the Kendryte K210 into the Linux image.
This option should be selected if no bootloader is being used.
If unsure, say Y.
+config SOC_CANAAN_K210_DTB_SOURCE
+ string "Source file for the Canaan Kendryte K210 builtin DTB"
+ depends on SOC_CANAAN
+ depends on SOC_CANAAN_K210_DTB_BUILTIN
+ default "k210_generic"
+ help
+ Base name (without suffix, relative to arch/riscv/boot/dts/canaan)
+ for the DTS file that will be used to produce the DTB linked into the
+ kernel.
+
+endif
+
endmenu
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index fb6e37db836d..34cf8a598617 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -1,7 +1,5 @@
# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies. Remember to do have actions
-# for "archclean" and "archdep" for cleaning up and making dependencies for
-# this architecture
+# architecture-specific flags and dependencies.
#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
@@ -12,9 +10,11 @@ OBJCOPYFLAGS := -O binary
LDFLAGS_vmlinux :=
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
LDFLAGS_vmlinux := --no-relax
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=8
endif
-ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy)
+ifeq ($(CONFIG_CMODEL_MEDLOW),y)
KBUILD_CFLAGS_MODULE += -mcmodel=medany
endif
@@ -36,11 +36,26 @@ else
KBUILD_LDFLAGS += -melf32lriscv
endif
+ifeq ($(CONFIG_LD_IS_LLD),y)
+ KBUILD_CFLAGS += -mno-relax
+ KBUILD_AFLAGS += -mno-relax
+ifndef CONFIG_AS_IS_LLVM
+ KBUILD_CFLAGS += -Wa,-mno-relax
+ KBUILD_AFLAGS += -Wa,-mno-relax
+endif
+endif
+
# ISA string setting
riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima
riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima
riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd
riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
+
+# Newer binutils versions default to ISA spec version 20191213 which moves some
+# instructions from the I extension to the Zicsr and Zifencei extensions.
+toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei)
+riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei
+
KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
KBUILD_AFLAGS += -march=$(riscv-march-y)
@@ -53,9 +68,6 @@ endif
ifeq ($(CONFIG_CMODEL_MEDANY),y)
KBUILD_CFLAGS += -mcmodel=medany
endif
-ifeq ($(CONFIG_MODULE_SECTIONS),y)
- KBUILD_LDS_MODULE += $(srctree)/arch/riscv/kernel/module.lds
-endif
ifeq ($(CONFIG_PERF_EVENTS),y)
KBUILD_CFLAGS += -fno-omit-frame-pointer
endif
@@ -68,29 +80,57 @@ KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
# architectures. It's faster to have GCC emit only aligned accesses.
KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
+ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
+prepare: stack_protector_prepare
+stack_protector_prepare: prepare0
+ $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls \
+ -mstack-protector-guard-reg=tp \
+ -mstack-protector-guard-offset=$(shell \
+ awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}' \
+ include/generated/asm-offsets.h))
+endif
+
# arch specific predefines for sparse
CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS)
# Default target when executing plain make
boot := arch/riscv/boot
+ifeq ($(CONFIG_XIP_KERNEL),y)
+KBUILD_IMAGE := $(boot)/xipImage
+else
KBUILD_IMAGE := $(boot)/Image.gz
+endif
head-y := arch/riscv/kernel/head.o
-core-y += arch/riscv/
-
libs-y += arch/riscv/lib/
+libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
+ $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
+ $(build)=arch/riscv/kernel/compat_vdso $@)
+
+ifeq ($(KBUILD_EXTMOD),)
+ifeq ($(CONFIG_MMU),y)
+prepare: vdso_prepare
+vdso_prepare: prepare0
+ $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h
+ $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
+ $(build)=arch/riscv/kernel/compat_vdso include/generated/compat_vdso-offsets.h)
+
+endif
+endif
-ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_KENDRYTE),yy)
+ifneq ($(CONFIG_XIP_KERNEL),y)
+ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
KBUILD_IMAGE := $(boot)/loader.bin
else
KBUILD_IMAGE := $(boot)/Image.gz
endif
-BOOT_TARGETS := Image Image.gz loader loader.bin
+endif
+BOOT_TARGETS := Image Image.gz loader loader.bin xipImage
all: $(notdir $(KBUILD_IMAGE))
@@ -98,5 +138,24 @@ $(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
@$(kecho) ' Kernel: $(boot)/$@ is ready'
-zinstall install:
- $(Q)$(MAKE) $(build)=$(boot) $@
+Image.%: Image
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+install: KBUILD_IMAGE := $(boot)/Image
+zinstall: KBUILD_IMAGE := $(boot)/Image.gz
+install zinstall:
+ $(call cmd,install)
+
+PHONY += rv32_randconfig
+rv32_randconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/32-bit.config \
+ -f $(srctree)/Makefile randconfig
+
+PHONY += rv64_randconfig
+rv64_randconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/64-bit.config \
+ -f $(srctree)/Makefile randconfig
+
+PHONY += rv32_defconfig
+rv32_defconfig:
+ $(Q)$(MAKE) -f $(srctree)/Makefile defconfig 32-bit.config
diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore
index 574c10f8ff68..0cea9f7fa9d5 100644
--- a/arch/riscv/boot/.gitignore
+++ b/arch/riscv/boot/.gitignore
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
Image
-Image.gz
+Image.*
loader
loader.lds
+loader.bin
+xipImage
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index 3530c59b3ea7..becd0621071c 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -14,9 +14,24 @@
# Based on the ia64 and arm64 boot/Makefile.
#
+KCOV_INSTRUMENT := n
+
OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
+OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
+
+targets := Image Image.* loader loader.o loader.lds loader.bin
+targets := Image Image.* loader loader.o loader.lds loader.bin xipImage
+
+ifeq ($(CONFIG_XIP_KERNEL),y)
+
+quiet_cmd_mkxip = $(quiet_cmd_objcopy)
+cmd_mkxip = $(cmd_objcopy)
-targets := Image loader
+$(obj)/xipImage: vmlinux FORCE
+ $(call if_changed,mkxip)
+ @$(kecho) ' Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)'
+
+endif
$(obj)/Image: vmlinux FORCE
$(call if_changed,objcopy)
@@ -43,11 +58,3 @@ $(obj)/Image.lzo: $(obj)/Image FORCE
$(obj)/loader.bin: $(obj)/loader FORCE
$(call if_changed,objcopy)
-
-install:
- $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
- $(obj)/Image System.map "$(INSTALL_PATH)"
-
-zinstall:
- $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
- $(obj)/Image.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile
index ca1f8cbd78c0..ff174996cdfd 100644
--- a/arch/riscv/boot/dts/Makefile
+++ b/arch/riscv/boot/dts/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
subdir-y += sifive
-subdir-y += kendryte
+subdir-y += starfive
+subdir-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += canaan
+subdir-y += microchip
obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y))
diff --git a/arch/riscv/boot/dts/canaan/Makefile b/arch/riscv/boot/dts/canaan/Makefile
new file mode 100644
index 000000000000..c61b08ac8554
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .dtb, $(CONFIG_SOC_CANAAN_K210_DTB_SOURCE))
+obj-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .o, $(dtb-y))
diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
new file mode 100644
index 000000000000..039b92abf046
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+ model = "Kendryte KD233";
+ compatible = "canaan,kendryte-kd233", "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ led0 {
+ gpios = <&gpio0 8 GPIO_ACTIVE_LOW>;
+ };
+
+ led1 {
+ gpios = <&gpio0 9 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ key0 {
+ label = "KEY0";
+ linux,code = <BTN_0>;
+ gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
+ };
+ };
+};
+
+&fpioa {
+ pinctrl-0 = <&jtag_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ jtag_pinctrl: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+
+ uarths_pinctrl: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+ };
+
+ spi0_pinctrl: spi0-pinmux {
+ pinmux = <K210_FPIOA(6, K210_PCF_GPIOHS20)>, /* cs */
+ <K210_FPIOA(7, K210_PCF_SPI0_SCLK)>, /* wr */
+ <K210_FPIOA(8, K210_PCF_GPIOHS21)>; /* dc */
+ };
+
+ dvp_pinctrl: dvp-pinmux {
+ pinmux = <K210_FPIOA(9, K210_PCF_SCCB_SCLK)>,
+ <K210_FPIOA(10, K210_PCF_SCCB_SDA)>,
+ <K210_FPIOA(11, K210_PCF_DVP_RST)>,
+ <K210_FPIOA(12, K210_PCF_DVP_VSYNC)>,
+ <K210_FPIOA(13, K210_PCF_DVP_PWDN)>,
+ <K210_FPIOA(14, K210_PCF_DVP_XCLK)>,
+ <K210_FPIOA(15, K210_PCF_DVP_PCLK)>,
+ <K210_FPIOA(17, K210_PCF_DVP_HSYNC)>;
+ };
+
+ gpiohs_pinctrl: gpiohs-pinmux {
+ pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+ <K210_FPIOA(20, K210_PCF_GPIOHS4)>, /* Rot. dip sw line 8 */
+ <K210_FPIOA(21, K210_PCF_GPIOHS5)>, /* Rot. dip sw line 4 */
+ <K210_FPIOA(22, K210_PCF_GPIOHS6)>, /* Rot. dip sw line 2 */
+ <K210_FPIOA(23, K210_PCF_GPIOHS7)>, /* Rot. dip sw line 1 */
+ <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+ <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+ <K210_FPIOA(26, K210_PCF_GPIOHS10)>;
+ };
+
+ spi1_pinctrl: spi1-pinmux {
+ pinmux = <K210_FPIOA(29, K210_PCF_SPI1_SCLK)>,
+ <K210_FPIOA(30, K210_PCF_SPI1_D0)>,
+ <K210_FPIOA(31, K210_PCF_SPI1_D1)>,
+ <K210_FPIOA(32, K210_PCF_GPIOHS16)>; /* cs */
+ };
+
+ i2s0_pinctrl: i2s0-pinmux {
+ pinmux = <K210_FPIOA(33, K210_PCF_I2S0_IN_D0)>,
+ <K210_FPIOA(34, K210_PCF_I2S0_WS)>,
+ <K210_FPIOA(35, K210_PCF_I2S0_SCLK)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-0 = <&gpiohs_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&i2s0 {
+ #sound-dai-cells = <1>;
+ pinctrl-0 = <&i2s0_pinctrl>;
+ pinctrl-names = "default";
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+ panel@0 {
+ compatible = "ilitek,ili9341";
+ reg = <0>;
+ dc-gpios = <&gpio0 21 GPIO_ACTIVE_HIGH>;
+ spi-max-frequency = <15000000>;
+ status = "disabled";
+ };
+};
+
+&spi1 {
+ pinctrl-0 = <&spi1_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 16 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <25000000>;
+ broken-cd;
+ };
+};
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
new file mode 100644
index 000000000000..44d338514761
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/k210.dtsi
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#include <dt-bindings/clock/k210-clk.h>
+#include <dt-bindings/pinctrl/k210-fpioa.h>
+#include <dt-bindings/reset/k210-rst.h>
+
+/ {
+ /*
+ * Although the K210 is a 64-bit CPU, the address bus is only 32-bits
+ * wide, and the upper half of all addresses is ignored.
+ */
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "canaan,kendryte-k210";
+
+ aliases {
+ serial0 = &uarths0;
+ serial1 = &uart1;
+ serial2 = &uart2;
+ serial3 = &uart3;
+ };
+
+ /*
+ * The K210 has an sv39 MMU following the privileged specification v1.9.
+ * Since this is a non-ratified draft specification, the kernel does not
+ * support it and the K210 support enabled only for the !MMU case.
+ * Be consistent with this by setting the CPUs MMU type to "none".
+ */
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ timebase-frequency = <7800000>;
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "canaan,k210", "riscv";
+ reg = <0>;
+ riscv,isa = "rv64imafdc";
+ mmu-type = "riscv,none";
+ i-cache-block-size = <64>;
+ i-cache-size = <0x8000>;
+ d-cache-block-size = <64>;
+ d-cache-size = <0x8000>;
+ cpu0_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "riscv,cpu-intc";
+ };
+ };
+ cpu1: cpu@1 {
+ device_type = "cpu";
+ compatible = "canaan,k210", "riscv";
+ reg = <1>;
+ riscv,isa = "rv64imafdc";
+ mmu-type = "riscv,none";
+ i-cache-block-size = <64>;
+ i-cache-size = <0x8000>;
+ d-cache-block-size = <64>;
+ d-cache-size = <0x8000>;
+ cpu1_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "riscv,cpu-intc";
+ };
+ };
+ };
+
+ sram: memory@80000000 {
+ device_type = "memory";
+ compatible = "canaan,k210-sram";
+ reg = <0x80000000 0x400000>,
+ <0x80400000 0x200000>,
+ <0x80600000 0x200000>;
+ reg-names = "sram0", "sram1", "aisram";
+ clocks = <&sysclk K210_CLK_SRAM0>,
+ <&sysclk K210_CLK_SRAM1>,
+ <&sysclk K210_CLK_AI>;
+ clock-names = "sram0", "sram1", "aisram";
+ };
+
+ clocks {
+ in0: oscillator {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <26000000>;
+ };
+ };
+
+ soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-bus";
+ ranges;
+ interrupt-parent = <&plic0>;
+
+ rom0: nvmem@1000 {
+ reg = <0x1000 0x1000>;
+ read-only;
+ };
+
+ clint0: timer@2000000 {
+ compatible = "canaan,k210-clint", "sifive,clint0";
+ reg = <0x2000000 0xC000>;
+ interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>,
+ <&cpu1_intc 3>, <&cpu1_intc 7>;
+ };
+
+ plic0: interrupt-controller@c000000 {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ compatible = "canaan,k210-plic", "sifive,plic-1.0.0";
+ reg = <0xC000000 0x4000000>;
+ interrupt-controller;
+ interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 9>,
+ <&cpu1_intc 11>, <&cpu1_intc 9>;
+ riscv,ndev = <65>;
+ };
+
+ uarths0: serial@38000000 {
+ compatible = "canaan,k210-uarths", "sifive,uart0";
+ reg = <0x38000000 0x1000>;
+ interrupts = <33>;
+ clocks = <&sysclk K210_CLK_CPU>;
+ };
+
+ gpio0: gpio-controller@38001000 {
+ #interrupt-cells = <2>;
+ #gpio-cells = <2>;
+ compatible = "canaan,k210-gpiohs", "sifive,gpio0";
+ reg = <0x38001000 0x1000>;
+ interrupt-controller;
+ interrupts = <34>, <35>, <36>, <37>, <38>, <39>, <40>,
+ <41>, <42>, <43>, <44>, <45>, <46>, <47>,
+ <48>, <49>, <50>, <51>, <52>, <53>, <54>,
+ <55>, <56>, <57>, <58>, <59>, <60>, <61>,
+ <62>, <63>, <64>, <65>;
+ gpio-controller;
+ ngpios = <32>;
+ };
+
+ dmac0: dma-controller@50000000 {
+ compatible = "snps,axi-dma-1.01a";
+ reg = <0x50000000 0x1000>;
+ interrupts = <27>, <28>, <29>, <30>, <31>, <32>;
+ #dma-cells = <1>;
+ clocks = <&sysclk K210_CLK_DMA>, <&sysclk K210_CLK_DMA>;
+ clock-names = "core-clk", "cfgr-clk";
+ resets = <&sysrst K210_RST_DMA>;
+ dma-channels = <6>;
+ snps,dma-masters = <2>;
+ snps,priority = <0 1 2 3 4 5>;
+ snps,data-width = <5>;
+ snps,block-size = <0x200000 0x200000 0x200000
+ 0x200000 0x200000 0x200000>;
+ snps,axi-max-burst-len = <256>;
+ };
+
+ apb0: bus@50200000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-pm-bus";
+ ranges;
+ clocks = <&sysclk K210_CLK_APB0>;
+
+ gpio1: gpio@50200000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dw-apb-gpio";
+ reg = <0x50200000 0x80>;
+ clocks = <&sysclk K210_CLK_APB0>,
+ <&sysclk K210_CLK_GPIO>;
+ clock-names = "bus", "db";
+ resets = <&sysrst K210_RST_GPIO>;
+
+ gpio1_0: gpio-port@0 {
+ #gpio-cells = <2>;
+ #interrupt-cells = <2>;
+ compatible = "snps,dw-apb-gpio-port";
+ reg = <0>;
+ interrupt-controller;
+ interrupts = <23>;
+ gpio-controller;
+ ngpios = <8>;
+ };
+ };
+
+ uart1: serial@50210000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x50210000 0x100>;
+ interrupts = <11>;
+ clocks = <&sysclk K210_CLK_UART1>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "baudclk", "apb_pclk";
+ resets = <&sysrst K210_RST_UART1>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ dcd-override;
+ dsr-override;
+ cts-override;
+ ri-override;
+ };
+
+ uart2: serial@50220000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x50220000 0x100>;
+ interrupts = <12>;
+ clocks = <&sysclk K210_CLK_UART2>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "baudclk", "apb_pclk";
+ resets = <&sysrst K210_RST_UART2>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ dcd-override;
+ dsr-override;
+ cts-override;
+ ri-override;
+ };
+
+ uart3: serial@50230000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x50230000 0x100>;
+ interrupts = <13>;
+ clocks = <&sysclk K210_CLK_UART3>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "baudclk", "apb_pclk";
+ resets = <&sysrst K210_RST_UART3>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ dcd-override;
+ dsr-override;
+ cts-override;
+ ri-override;
+ };
+
+ spi2: spi@50240000 {
+ compatible = "canaan,k210-spi";
+ spi-slave;
+ reg = <0x50240000 0x100>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ interrupts = <3>;
+ clocks = <&sysclk K210_CLK_SPI2>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ssi_clk", "pclk";
+ resets = <&sysrst K210_RST_SPI2>;
+ spi-max-frequency = <25000000>;
+ };
+
+ i2s0: i2s@50250000 {
+ compatible = "snps,designware-i2s";
+ reg = <0x50250000 0x200>;
+ interrupts = <5>;
+ clocks = <&sysclk K210_CLK_I2S0>;
+ clock-names = "i2sclk";
+ resets = <&sysrst K210_RST_I2S0>;
+ };
+
+ i2s1: i2s@50260000 {
+ compatible = "snps,designware-i2s";
+ reg = <0x50260000 0x200>;
+ interrupts = <6>;
+ clocks = <&sysclk K210_CLK_I2S1>;
+ clock-names = "i2sclk";
+ resets = <&sysrst K210_RST_I2S1>;
+ };
+
+ i2s2: i2s@50270000 {
+ compatible = "snps,designware-i2s";
+ reg = <0x50270000 0x200>;
+ interrupts = <7>;
+ clocks = <&sysclk K210_CLK_I2S2>;
+ clock-names = "i2sclk";
+ resets = <&sysrst K210_RST_I2S2>;
+ };
+
+ i2c0: i2c@50280000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x50280000 0x100>;
+ interrupts = <8>;
+ clocks = <&sysclk K210_CLK_I2C0>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ref", "pclk";
+ resets = <&sysrst K210_RST_I2C0>;
+ };
+
+ i2c1: i2c@50290000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x50290000 0x100>;
+ interrupts = <9>;
+ clocks = <&sysclk K210_CLK_I2C1>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ref", "pclk";
+ resets = <&sysrst K210_RST_I2C1>;
+ };
+
+ i2c2: i2c@502a0000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x502A0000 0x100>;
+ interrupts = <10>;
+ clocks = <&sysclk K210_CLK_I2C2>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ref", "pclk";
+ resets = <&sysrst K210_RST_I2C2>;
+ };
+
+ fpioa: pinmux@502b0000 {
+ compatible = "canaan,k210-fpioa";
+ reg = <0x502B0000 0x100>;
+ clocks = <&sysclk K210_CLK_FPIOA>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ref", "pclk";
+ resets = <&sysrst K210_RST_FPIOA>;
+ canaan,k210-sysctl-power = <&sysctl 108>;
+ };
+
+ timer0: timer@502d0000 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0x502D0000 0x100>;
+ interrupts = <14>, <15>;
+ clocks = <&sysclk K210_CLK_TIMER0>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "timer", "pclk";
+ resets = <&sysrst K210_RST_TIMER0>;
+ };
+
+ timer1: timer@502e0000 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0x502E0000 0x100>;
+ interrupts = <16>, <17>;
+ clocks = <&sysclk K210_CLK_TIMER1>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "timer", "pclk";
+ resets = <&sysrst K210_RST_TIMER1>;
+ };
+
+ timer2: timer@502f0000 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0x502F0000 0x100>;
+ interrupts = <18>, <19>;
+ clocks = <&sysclk K210_CLK_TIMER2>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "timer", "pclk";
+ resets = <&sysrst K210_RST_TIMER2>;
+ };
+ };
+
+ apb1: bus@50400000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-pm-bus";
+ ranges;
+ clocks = <&sysclk K210_CLK_APB1>;
+
+ wdt0: watchdog@50400000 {
+ compatible = "snps,dw-wdt";
+ reg = <0x50400000 0x100>;
+ interrupts = <21>;
+ clocks = <&sysclk K210_CLK_WDT0>,
+ <&sysclk K210_CLK_APB1>;
+ clock-names = "tclk", "pclk";
+ resets = <&sysrst K210_RST_WDT0>;
+ };
+
+ wdt1: watchdog@50410000 {
+ compatible = "snps,dw-wdt";
+ reg = <0x50410000 0x100>;
+ interrupts = <22>;
+ clocks = <&sysclk K210_CLK_WDT1>,
+ <&sysclk K210_CLK_APB1>;
+ clock-names = "tclk", "pclk";
+ resets = <&sysrst K210_RST_WDT1>;
+ };
+
+ sysctl: syscon@50440000 {
+ compatible = "canaan,k210-sysctl",
+ "syscon", "simple-mfd";
+ reg = <0x50440000 0x100>;
+ clocks = <&sysclk K210_CLK_APB1>;
+ clock-names = "pclk";
+
+ sysclk: clock-controller {
+ #clock-cells = <1>;
+ compatible = "canaan,k210-clk";
+ clocks = <&in0>;
+ };
+
+ sysrst: reset-controller {
+ compatible = "canaan,k210-rst";
+ #reset-cells = <1>;
+ };
+
+ reboot: syscon-reboot {
+ compatible = "syscon-reboot";
+ regmap = <&sysctl>;
+ offset = <48>;
+ mask = <1>;
+ value = <1>;
+ };
+ };
+ };
+
+ apb2: bus@52000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-pm-bus";
+ ranges;
+ clocks = <&sysclk K210_CLK_APB2>;
+
+ spi0: spi@52000000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "canaan,k210-spi";
+ reg = <0x52000000 0x100>;
+ interrupts = <1>;
+ clocks = <&sysclk K210_CLK_SPI0>,
+ <&sysclk K210_CLK_APB2>;
+ clock-names = "ssi_clk", "pclk";
+ resets = <&sysrst K210_RST_SPI0>;
+ reset-names = "spi";
+ spi-max-frequency = <25000000>;
+ num-cs = <4>;
+ reg-io-width = <4>;
+ };
+
+ spi1: spi@53000000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "canaan,k210-spi";
+ reg = <0x53000000 0x100>;
+ interrupts = <2>;
+ clocks = <&sysclk K210_CLK_SPI1>,
+ <&sysclk K210_CLK_APB2>;
+ clock-names = "ssi_clk", "pclk";
+ resets = <&sysrst K210_RST_SPI1>;
+ reset-names = "spi";
+ spi-max-frequency = <25000000>;
+ num-cs = <4>;
+ reg-io-width = <4>;
+ };
+
+ spi3: spi@54000000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dwc-ssi-1.01a";
+ reg = <0x54000000 0x200>;
+ interrupts = <4>;
+ clocks = <&sysclk K210_CLK_SPI3>,
+ <&sysclk K210_CLK_APB2>;
+ clock-names = "ssi_clk", "pclk";
+ resets = <&sysrst K210_RST_SPI3>;
+ reset-names = "spi";
+ /* Could possibly go up to 200 MHz */
+ spi-max-frequency = <100000000>;
+ num-cs = <4>;
+ reg-io-width = <4>;
+ };
+ };
+ };
+};
diff --git a/arch/riscv/boot/dts/canaan/k210_generic.dts b/arch/riscv/boot/dts/canaan/k210_generic.dts
new file mode 100644
index 000000000000..396c8ca4d24d
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/k210_generic.dts
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+ model = "Kendryte K210 generic";
+ compatible = "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+};
+
+&fpioa {
+ pinctrl-0 = <&jtag_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ jtag_pins: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+
+ uarths_pins: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
new file mode 100644
index 000000000000..b9e30df127fe
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+ model = "SiPeed MAIX BiT";
+ compatible = "sipeed,maix-bit", "sipeed,maix-bitm",
+ "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ led0 {
+ color = <LED_COLOR_ID_GREEN>;
+ label = "green";
+ gpios = <&gpio1_0 4 GPIO_ACTIVE_LOW>;
+ };
+
+ led1 {
+ color = <LED_COLOR_ID_RED>;
+ label = "red";
+ gpios = <&gpio1_0 5 GPIO_ACTIVE_LOW>;
+ };
+
+ led2 {
+ color = <LED_COLOR_ID_BLUE>;
+ label = "blue";
+ gpios = <&gpio1_0 6 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ boot {
+ label = "BOOT";
+ linux,code = <BTN_0>;
+ gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+ };
+ };
+};
+
+&fpioa {
+ pinctrl-names = "default";
+ pinctrl-0 = <&jtag_pinctrl>;
+ status = "okay";
+
+ jtag_pinctrl: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+
+ uarths_pinctrl: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+ };
+
+ gpio_pinctrl: gpio-pinmux {
+ pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+ <K210_FPIOA(9, K210_PCF_GPIO1)>,
+ <K210_FPIOA(10, K210_PCF_GPIO2)>,
+ <K210_FPIOA(11, K210_PCF_GPIO3)>,
+ <K210_FPIOA(12, K210_PCF_GPIO4)>,
+ <K210_FPIOA(13, K210_PCF_GPIO5)>,
+ <K210_FPIOA(14, K210_PCF_GPIO6)>,
+ <K210_FPIOA(15, K210_PCF_GPIO7)>;
+ };
+
+ gpiohs_pinctrl: gpiohs-pinmux {
+ pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+ <K210_FPIOA(17, K210_PCF_GPIOHS1)>,
+ <K210_FPIOA(21, K210_PCF_GPIOHS5)>,
+ <K210_FPIOA(22, K210_PCF_GPIOHS6)>,
+ <K210_FPIOA(23, K210_PCF_GPIOHS7)>,
+ <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+ <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+ <K210_FPIOA(32, K210_PCF_GPIOHS16)>,
+ <K210_FPIOA(33, K210_PCF_GPIOHS17)>,
+ <K210_FPIOA(34, K210_PCF_GPIOHS18)>,
+ <K210_FPIOA(35, K210_PCF_GPIOHS19)>;
+ };
+
+ i2s0_pinctrl: i2s0-pinmux {
+ pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+ <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+ <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+ };
+
+ dvp_pinctrl: dvp-pinmux {
+ pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+ <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+ <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+ <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+ <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+ <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+ <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+ <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+ };
+
+ spi0_pinctrl: spi0-pinmux {
+ pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>, /* cs */
+ <K210_FPIOA(37, K210_PCF_GPIOHS21)>, /* rst */
+ <K210_FPIOA(38, K210_PCF_GPIOHS22)>, /* dc */
+ <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+ };
+
+ spi1_pinctrl: spi1-pinmux {
+ pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+ <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+ <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+ <K210_FPIOA(29, K210_PCF_GPIOHS13)>; /* cs */
+ };
+
+ i2c1_pinctrl: i2c1-pinmux {
+ pinmux = <K210_FPIOA(30, K210_PCF_I2C1_SCLK)>,
+ <K210_FPIOA(31, K210_PCF_I2C1_SDA)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-0 = <&gpiohs_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio1 {
+ pinctrl-0 = <&gpio_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&i2s0 {
+ #sound-dai-cells = <1>;
+ pinctrl-0 = <&i2s0_pinctrl>;
+ pinctrl-names = "default";
+};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1_pinctrl>;
+ pinctrl-names = "default";
+ clock-frequency = <400000>;
+ status = "okay";
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+ panel@0 {
+ compatible = "sitronix,st7789v";
+ reg = <0>;
+ reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+ dc-gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>;
+ spi-max-frequency = <15000000>;
+ spi-cs-high;
+ status = "disabled";
+ };
+};
+
+&spi1 {
+ pinctrl-0 = <&spi1_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <25000000>;
+ broken-cd;
+ };
+};
+
+&spi3 {
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
+ spi-tx-bus-width = <4>;
+ spi-rx-bus-width = <4>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
new file mode 100644
index 000000000000..8d23401b0bbb
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+ model = "SiPeed MAIX Dock";
+ compatible = "sipeed,maix-dock-m1", "sipeed,maix-dock-m1w",
+ "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ /*
+ * Note: the board wiring drawing documents green on
+ * gpio #4, red on gpio #5 and blue on gpio #6. However,
+ * the board is actually wired differently as defined here.
+ */
+ led0 {
+ color = <LED_COLOR_ID_BLUE>;
+ label = "blue";
+ gpios = <&gpio1_0 4 GPIO_ACTIVE_LOW>;
+ };
+
+ led1 {
+ color = <LED_COLOR_ID_GREEN>;
+ label = "green";
+ gpios = <&gpio1_0 5 GPIO_ACTIVE_LOW>;
+ };
+
+ led2 {
+ color = <LED_COLOR_ID_RED>;
+ label = "red";
+ gpios = <&gpio1_0 6 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ boot {
+ label = "BOOT";
+ linux,code = <BTN_0>;
+ gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+ };
+ };
+};
+
+&fpioa {
+ pinctrl-0 = <&jtag_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ jtag_pinctrl: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+
+ uarths_pinctrl: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+ };
+
+ gpio_pinctrl: gpio-pinmux {
+ pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+ <K210_FPIOA(11, K210_PCF_GPIO3)>,
+ <K210_FPIOA(12, K210_PCF_GPIO4)>,
+ <K210_FPIOA(13, K210_PCF_GPIO5)>,
+ <K210_FPIOA(14, K210_PCF_GPIO6)>,
+ <K210_FPIOA(15, K210_PCF_GPIO7)>;
+ };
+
+ gpiohs_pinctrl: gpiohs-pinmux {
+ pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+ <K210_FPIOA(17, K210_PCF_GPIOHS1)>,
+ <K210_FPIOA(21, K210_PCF_GPIOHS5)>,
+ <K210_FPIOA(22, K210_PCF_GPIOHS6)>,
+ <K210_FPIOA(23, K210_PCF_GPIOHS7)>,
+ <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+ <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+ <K210_FPIOA(32, K210_PCF_GPIOHS16)>,
+ <K210_FPIOA(33, K210_PCF_GPIOHS17)>,
+ <K210_FPIOA(34, K210_PCF_GPIOHS18)>,
+ <K210_FPIOA(35, K210_PCF_GPIOHS19)>;
+ };
+
+ i2s0_pinctrl: i2s0-pinmux {
+ pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+ <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+ <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+ };
+
+ dvp_pinctrl: dvp-pinmux {
+ pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+ <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+ <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+ <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+ <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+ <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+ <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+ <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+ };
+
+ spi0_pinctrl: spi0-pinmux {
+ pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>, /* cs */
+ <K210_FPIOA(37, K210_PCF_GPIOHS21)>, /* rst */
+ <K210_FPIOA(38, K210_PCF_GPIOHS22)>, /* dc */
+ <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+ };
+
+ spi1_pinctrl: spi1-pinmux {
+ pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+ <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+ <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+ <K210_FPIOA(29, K210_PCF_GPIOHS13)>; /* cs */
+ };
+
+ i2c1_pinctrl: i2c1-pinmux {
+ pinmux = <K210_FPIOA(9, K210_PCF_I2C1_SCLK)>,
+ <K210_FPIOA(10, K210_PCF_I2C1_SDA)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-0 = <&gpiohs_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio1 {
+ pinctrl-0 = <&gpio_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&i2s0 {
+ #sound-dai-cells = <1>;
+ pinctrl-0 = <&i2s0_pinctrl>;
+ pinctrl-names = "default";
+};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1_pinctrl>;
+ pinctrl-names = "default";
+ clock-frequency = <400000>;
+ status = "okay";
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+ panel@0 {
+ compatible = "sitronix,st7789v";
+ reg = <0>;
+ reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+ dc-gpios = <&gpio0 22 0>;
+ spi-max-frequency = <15000000>;
+ status = "disabled";
+ };
+};
+
+&spi1 {
+ pinctrl-0 = <&spi1_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <25000000>;
+ broken-cd;
+ };
+};
+
+&spi3 {
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
+ spi-tx-bus-width = <4>;
+ spi-rx-bus-width = <4>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
new file mode 100644
index 000000000000..24fd83b43d9d
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+ model = "SiPeed MAIX GO";
+ compatible = "sipeed,maix-go", "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ led0 {
+ color = <LED_COLOR_ID_GREEN>;
+ label = "green";
+ gpios = <&gpio1_0 4 GPIO_ACTIVE_LOW>;
+ };
+
+ led1 {
+ color = <LED_COLOR_ID_RED>;
+ label = "red";
+ gpios = <&gpio1_0 5 GPIO_ACTIVE_LOW>;
+ };
+
+ led2 {
+ color = <LED_COLOR_ID_BLUE>;
+ label = "blue";
+ gpios = <&gpio1_0 6 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ up {
+ label = "UP";
+ linux,code = <BTN_1>;
+ gpios = <&gpio1_0 7 GPIO_ACTIVE_LOW>;
+ };
+
+ press {
+ label = "PRESS";
+ linux,code = <BTN_0>;
+ gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+ };
+
+ down {
+ label = "DOWN";
+ linux,code = <BTN_2>;
+ gpios = <&gpio0 1 GPIO_ACTIVE_LOW>;
+ };
+ };
+};
+
+&fpioa {
+ pinctrl-0 = <&jtag_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ jtag_pinctrl: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+
+ uarths_pinctrl: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+ };
+
+ gpio_pinctrl: gpio-pinmux {
+ pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+ <K210_FPIOA(9, K210_PCF_GPIO1)>,
+ <K210_FPIOA(10, K210_PCF_GPIO2)>,
+ <K210_FPIOA(11, K210_PCF_GPIO3)>,
+ <K210_FPIOA(12, K210_PCF_GPIO4)>,
+ <K210_FPIOA(13, K210_PCF_GPIO5)>,
+ <K210_FPIOA(14, K210_PCF_GPIO6)>,
+ <K210_FPIOA(15, K210_PCF_GPIO7)>;
+ };
+
+ gpiohs_pinctrl: gpiohs-pinmux {
+ pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+ <K210_FPIOA(17, K210_PCF_GPIOHS1)>,
+ <K210_FPIOA(21, K210_PCF_GPIOHS5)>,
+ <K210_FPIOA(22, K210_PCF_GPIOHS6)>,
+ <K210_FPIOA(23, K210_PCF_GPIOHS7)>,
+ <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+ <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+ <K210_FPIOA(32, K210_PCF_GPIOHS16)>,
+ <K210_FPIOA(33, K210_PCF_GPIOHS17)>,
+ <K210_FPIOA(34, K210_PCF_GPIOHS18)>,
+ <K210_FPIOA(35, K210_PCF_GPIOHS19)>;
+ };
+
+ i2s0_pinctrl: i2s0-pinmux {
+ pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+ <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+ <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+ };
+
+ dvp_pinctrl: dvp-pinmux {
+ pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+ <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+ <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+ <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+ <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+ <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+ <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+ <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+ };
+
+ spi0_pinctrl: spi0-pinmux {
+ pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>, /* cs */
+ <K210_FPIOA(37, K210_PCF_GPIOHS21)>, /* rst */
+ <K210_FPIOA(38, K210_PCF_GPIOHS22)>, /* dc */
+ <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+ };
+
+ spi1_pinctrl: spi1-pinmux {
+ pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+ <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+ <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+ <K210_FPIOA(29, K210_PCF_GPIOHS13)>; /* cs */
+ };
+
+ i2c1_pinctrl: i2c1-pinmux {
+ pinmux = <K210_FPIOA(30, K210_PCF_I2C1_SCLK)>,
+ <K210_FPIOA(31, K210_PCF_I2C1_SDA)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-0 = <&gpiohs_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio1 {
+ pinctrl-0 = <&gpio_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&i2s0 {
+ #sound-dai-cells = <1>;
+ pinctrl-0 = <&i2s0_pinctrl>;
+ pinctrl-names = "default";
+};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1_pinctrl>;
+ pinctrl-names = "default";
+ clock-frequency = <400000>;
+ status = "okay";
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+ panel@0 {
+ compatible = "sitronix,st7789v";
+ reg = <0>;
+ reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+ dc-gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>;
+ spi-max-frequency = <15000000>;
+ status = "disabled";
+ };
+};
+
+&spi1 {
+ pinctrl-0 = <&spi1_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <25000000>;
+ broken-cd;
+ };
+};
+
+&spi3 {
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
+ spi-tx-bus-width = <4>;
+ spi-rx-bus-width = <4>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
new file mode 100644
index 000000000000..25341f38292a
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+ model = "SiPeed MAIXDUINO";
+ compatible = "sipeed,maixduino", "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ boot {
+ label = "BOOT";
+ linux,code = <BTN_0>;
+ gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ vcc_3v3: regulator-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+};
+
+&fpioa {
+ status = "okay";
+
+ uarths_pinctrl: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>, /* Header "0" */
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>; /* Header "1" */
+ };
+
+ gpio_pinctrl: gpio-pinmux {
+ pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+ <K210_FPIOA(9, K210_PCF_GPIO1)>;
+ };
+
+ gpiohs_pinctrl: gpiohs-pinmux {
+ pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>, /* BOOT */
+ <K210_FPIOA(21, K210_PCF_GPIOHS2)>, /* Header "2" */
+ <K210_FPIOA(22, K210_PCF_GPIOHS3)>, /* Header "3" */
+ <K210_FPIOA(23, K210_PCF_GPIOHS4)>, /* Header "4" */
+ <K210_FPIOA(24, K210_PCF_GPIOHS5)>, /* Header "5" */
+ <K210_FPIOA(32, K210_PCF_GPIOHS6)>, /* Header "6" */
+ <K210_FPIOA(15, K210_PCF_GPIOHS7)>, /* Header "7" */
+ <K210_FPIOA(14, K210_PCF_GPIOHS8)>, /* Header "8" */
+ <K210_FPIOA(13, K210_PCF_GPIOHS9)>, /* Header "9" */
+ <K210_FPIOA(12, K210_PCF_GPIOHS10)>, /* Header "10" */
+ <K210_FPIOA(11, K210_PCF_GPIOHS11)>, /* Header "11" */
+ <K210_FPIOA(10, K210_PCF_GPIOHS12)>, /* Header "12" */
+ <K210_FPIOA(3, K210_PCF_GPIOHS13)>; /* Header "13" */
+ };
+
+ i2s0_pinctrl: i2s0-pinmux {
+ pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+ <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+ <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+ };
+
+ spi1_pinctrl: spi1-pinmux {
+ pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+ <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+ <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+ <K210_FPIOA(29, K210_PCF_GPIO2)>; /* cs */
+ };
+
+ i2c1_pinctrl: i2c1-pinmux {
+ pinmux = <K210_FPIOA(30, K210_PCF_I2C1_SCLK)>, /* Header "scl" */
+ <K210_FPIOA(31, K210_PCF_I2C1_SDA)>; /* Header "sda" */
+ };
+
+ i2s1_pinctrl: i2s1-pinmux {
+ pinmux = <K210_FPIOA(33, K210_PCF_I2S1_WS)>,
+ <K210_FPIOA(34, K210_PCF_I2S1_IN_D0)>,
+ <K210_FPIOA(35, K210_PCF_I2S1_SCLK)>;
+ };
+
+ spi0_pinctrl: spi0-pinmux {
+ pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>, /* cs */
+ <K210_FPIOA(37, K210_PCF_GPIOHS21)>, /* rst */
+ <K210_FPIOA(38, K210_PCF_GPIOHS22)>, /* dc */
+ <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+ };
+
+ dvp_pinctrl: dvp-pinmux {
+ pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+ <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+ <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+ <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+ <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+ <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+ <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+ <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-0 = <&gpiohs_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio1 {
+ pinctrl-0 = <&gpio_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&i2s0 {
+ #sound-dai-cells = <1>;
+ pinctrl-0 = <&i2s0_pinctrl>;
+ pinctrl-names = "default";
+};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1_pinctrl>;
+ pinctrl-names = "default";
+ clock-frequency = <400000>;
+ status = "okay";
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+ panel@0 {
+ compatible = "sitronix,st7789v";
+ reg = <0>;
+ reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+ dc-gpios = <&gpio0 22 0>;
+ spi-max-frequency = <15000000>;
+ power-supply = <&vcc_3v3>;
+ };
+};
+
+&spi1 {
+ pinctrl-0 = <&spi1_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio1_0 2 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <25000000>;
+ broken-cd;
+ };
+};
+
+&spi3 {
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
+ spi-tx-bus-width = <4>;
+ spi-rx-bus-width = <4>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+};
diff --git a/arch/riscv/boot/dts/kendryte/Makefile b/arch/riscv/boot/dts/kendryte/Makefile
deleted file mode 100644
index 1a88e616f18e..000000000000
--- a/arch/riscv/boot/dts/kendryte/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-dtb-$(CONFIG_SOC_KENDRYTE_K210_DTB) += k210.dtb
-
-obj-$(CONFIG_SOC_KENDRYTE_K210_DTB_BUILTIN) += $(addsuffix .o, $(dtb-y))
diff --git a/arch/riscv/boot/dts/kendryte/k210.dts b/arch/riscv/boot/dts/kendryte/k210.dts
deleted file mode 100644
index 0d1f28fce6b2..000000000000
--- a/arch/riscv/boot/dts/kendryte/k210.dts
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2020 Western Digital Corporation or its affiliates.
- */
-
-/dts-v1/;
-
-#include "k210.dtsi"
-
-/ {
- model = "Kendryte K210 generic";
- compatible = "kendryte,k210";
-
- chosen {
- bootargs = "earlycon console=ttySIF0";
- stdout-path = "serial0";
- };
-};
-
-&uarths0 {
- status = "okay";
-};
-
diff --git a/arch/riscv/boot/dts/kendryte/k210.dtsi b/arch/riscv/boot/dts/kendryte/k210.dtsi
deleted file mode 100644
index c1df56ccb8d5..000000000000
--- a/arch/riscv/boot/dts/kendryte/k210.dtsi
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2019 Sean Anderson <seanga2@gmail.com>
- * Copyright (C) 2020 Western Digital Corporation or its affiliates.
- */
-#include <dt-bindings/clock/k210-clk.h>
-
-/ {
- /*
- * Although the K210 is a 64-bit CPU, the address bus is only 32-bits
- * wide, and the upper half of all addresses is ignored.
- */
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "kendryte,k210";
-
- aliases {
- serial0 = &uarths0;
- };
-
- /*
- * The K210 has an sv39 MMU following the priviledge specification v1.9.
- * Since this is a non-ratified draft specification, the kernel does not
- * support it and the K210 support enabled only for the !MMU case.
- * Be consistent with this by setting the CPUs MMU type to "none".
- */
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
- timebase-frequency = <7800000>;
- cpu0: cpu@0 {
- device_type = "cpu";
- reg = <0>;
- compatible = "kendryte,k210", "sifive,rocket0", "riscv";
- riscv,isa = "rv64imafdc";
- mmu-type = "none";
- i-cache-size = <0x8000>;
- i-cache-block-size = <64>;
- d-cache-size = <0x8000>;
- d-cache-block-size = <64>;
- clocks = <&sysctl K210_CLK_CPU>;
- clock-frequency = <390000000>;
- cpu0_intc: interrupt-controller {
- #interrupt-cells = <1>;
- interrupt-controller;
- compatible = "riscv,cpu-intc";
- };
- };
- cpu1: cpu@1 {
- device_type = "cpu";
- reg = <1>;
- compatible = "kendryte,k210", "sifive,rocket0", "riscv";
- riscv,isa = "rv64imafdc";
- mmu-type = "none";
- i-cache-size = <0x8000>;
- i-cache-block-size = <64>;
- d-cache-size = <0x8000>;
- d-cache-block-size = <64>;
- clocks = <&sysctl K210_CLK_CPU>;
- clock-frequency = <390000000>;
- cpu1_intc: interrupt-controller {
- #interrupt-cells = <1>;
- interrupt-controller;
- compatible = "riscv,cpu-intc";
- };
- };
- };
-
- sram: memory@80000000 {
- device_type = "memory";
- reg = <0x80000000 0x400000>,
- <0x80400000 0x200000>,
- <0x80600000 0x200000>;
- reg-names = "sram0", "sram1", "aisram";
- };
-
- clocks {
- in0: oscillator {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <26000000>;
- };
- };
-
- soc {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "kendryte,k210-soc", "simple-bus";
- ranges;
- interrupt-parent = <&plic0>;
-
- sysctl: sysctl@50440000 {
- compatible = "kendryte,k210-sysctl", "simple-mfd";
- reg = <0x50440000 0x1000>;
- #clock-cells = <1>;
- };
-
- clint0: interrupt-controller@2000000 {
- compatible = "riscv,clint0";
- reg = <0x2000000 0xC000>;
- interrupts-extended = <&cpu0_intc 3>, <&cpu1_intc 3>;
- clocks = <&sysctl K210_CLK_ACLK>;
- };
-
- plic0: interrupt-controller@c000000 {
- #interrupt-cells = <1>;
- interrupt-controller;
- compatible = "kendryte,k210-plic0", "riscv,plic0";
- reg = <0xC000000 0x4000000>;
- interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 0xffffffff>,
- <&cpu1_intc 11>, <&cpu1_intc 0xffffffff>;
- riscv,ndev = <65>;
- riscv,max-priority = <7>;
- };
-
- uarths0: serial@38000000 {
- compatible = "kendryte,k210-uarths", "sifive,uart0";
- reg = <0x38000000 0x1000>;
- interrupts = <33>;
- clocks = <&sysctl K210_CLK_CPU>;
- };
- };
-};
diff --git a/arch/riscv/boot/dts/microchip/Makefile b/arch/riscv/boot/dts/microchip/Makefile
new file mode 100644
index 000000000000..39aae7b04f1c
--- /dev/null
+++ b/arch/riscv/boot/dts/microchip/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += mpfs-icicle-kit.dtb
+dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += mpfs-polarberry.dtb
+obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y))
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi
new file mode 100644
index 000000000000..0d28858b83f2
--- /dev/null
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020-2021 Microchip Technology Inc */
+
+/ {
+ compatible = "microchip,mpfs-icicle-reference-rtlv2203", "microchip,mpfs";
+
+ core_pwm0: pwm@41000000 {
+ compatible = "microchip,corepwm-rtl-v4";
+ reg = <0x0 0x41000000 0x0 0xF0>;
+ microchip,sync-update-mask = /bits/ 32 <0>;
+ #pwm-cells = <2>;
+ clocks = <&fabric_clk3>;
+ status = "disabled";
+ };
+
+ i2c2: i2c@44000000 {
+ compatible = "microchip,corei2c-rtl-v7";
+ reg = <0x0 0x44000000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&fabric_clk3>;
+ interrupt-parent = <&plic>;
+ interrupts = <122>;
+ clock-frequency = <100000>;
+ status = "disabled";
+ };
+
+ fabric_clk3: fabric-clk3 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <62500000>;
+ };
+
+ fabric_clk1: fabric-clk1 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <125000000>;
+ };
+};
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
new file mode 100644
index 000000000000..044982a11df5
--- /dev/null
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020-2021 Microchip Technology Inc */
+
+/dts-v1/;
+
+#include "mpfs.dtsi"
+#include "mpfs-icicle-kit-fabric.dtsi"
+
+/* Clock frequency (in Hz) of the rtcclk */
+#define RTCCLK_FREQ 1000000
+
+/ {
+ model = "Microchip PolarFire-SoC Icicle Kit";
+ compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs";
+
+ aliases {
+ ethernet0 = &mac1;
+ serial0 = &mmuart0;
+ serial1 = &mmuart1;
+ serial2 = &mmuart2;
+ serial3 = &mmuart3;
+ serial4 = &mmuart4;
+ };
+
+ chosen {
+ stdout-path = "serial1:115200n8";
+ };
+
+ cpus {
+ timebase-frequency = <RTCCLK_FREQ>;
+ };
+
+ ddrc_cache_lo: memory@80000000 {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x0 0x2e000000>;
+ status = "okay";
+ };
+
+ ddrc_cache_hi: memory@1000000000 {
+ device_type = "memory";
+ reg = <0x10 0x0 0x0 0x40000000>;
+ status = "okay";
+ };
+};
+
+&core_pwm0 {
+ status = "okay";
+};
+
+&gpio2 {
+ interrupts = <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>;
+ status = "okay";
+};
+
+&i2c0 {
+ status = "okay";
+};
+
+&i2c1 {
+ status = "okay";
+};
+
+&i2c2 {
+ status = "okay";
+};
+
+&mac0 {
+ phy-mode = "sgmii";
+ phy-handle = <&phy0>;
+ status = "okay";
+};
+
+&mac1 {
+ phy-mode = "sgmii";
+ phy-handle = <&phy1>;
+ status = "okay";
+
+ phy1: ethernet-phy@9 {
+ reg = <9>;
+ ti,fifo-depth = <0x1>;
+ };
+
+ phy0: ethernet-phy@8 {
+ reg = <8>;
+ ti,fifo-depth = <0x1>;
+ };
+};
+
+&mbox {
+ status = "okay";
+};
+
+&mmc {
+ bus-width = <4>;
+ disable-wp;
+ cap-sd-highspeed;
+ cap-mmc-highspeed;
+ card-detect-delay = <200>;
+ mmc-ddr-1_8v;
+ mmc-hs200-1_8v;
+ sd-uhs-sdr12;
+ sd-uhs-sdr25;
+ sd-uhs-sdr50;
+ sd-uhs-sdr104;
+ status = "okay";
+};
+
+&mmuart1 {
+ status = "okay";
+};
+
+&mmuart2 {
+ status = "okay";
+};
+
+&mmuart3 {
+ status = "okay";
+};
+
+&mmuart4 {
+ status = "okay";
+};
+
+&pcie {
+ status = "okay";
+};
+
+&qspi {
+ status = "okay";
+};
+
+&refclk {
+ clock-frequency = <125000000>;
+};
+
+&rtc {
+ status = "okay";
+};
+
+&spi0 {
+ status = "okay";
+};
+
+&spi1 {
+ status = "okay";
+};
+
+&syscontroller {
+ status = "okay";
+};
+
+&usb {
+ status = "okay";
+ dr_mode = "host";
+};
diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi
new file mode 100644
index 000000000000..49380c428ec9
--- /dev/null
+++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020-2022 Microchip Technology Inc */
+
+/ {
+ fabric_clk3: fabric-clk3 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <62500000>;
+ };
+
+ fabric_clk1: fabric-clk1 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <125000000>;
+ };
+};
diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
new file mode 100644
index 000000000000..82c93c8f5c17
--- /dev/null
+++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020-2022 Microchip Technology Inc */
+
+/dts-v1/;
+
+#include "mpfs.dtsi"
+#include "mpfs-polarberry-fabric.dtsi"
+
+/* Clock frequency (in Hz) of the rtcclk */
+#define MTIMER_FREQ 1000000
+
+/ {
+ model = "Sundance PolarBerry";
+ compatible = "sundance,polarberry", "microchip,mpfs";
+
+ aliases {
+ ethernet0 = &mac1;
+ serial0 = &mmuart0;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ cpus {
+ timebase-frequency = <MTIMER_FREQ>;
+ };
+
+ ddrc_cache_lo: memory@80000000 {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x0 0x2e000000>;
+ };
+
+ ddrc_cache_hi: memory@1000000000 {
+ device_type = "memory";
+ reg = <0x10 0x00000000 0x0 0xC0000000>;
+ };
+};
+
+/*
+ * phy0 is connected to mac0, but the port itself is on the (optional) carrier
+ * board.
+ */
+&mac0 {
+ phy-mode = "sgmii";
+ phy-handle = <&phy0>;
+ status = "disabled";
+};
+
+&mac1 {
+ phy-mode = "sgmii";
+ phy-handle = <&phy1>;
+ status = "okay";
+
+ phy1: ethernet-phy@5 {
+ reg = <5>;
+ ti,fifo-depth = <0x01>;
+ };
+
+ phy0: ethernet-phy@4 {
+ reg = <4>;
+ ti,fifo-depth = <0x01>;
+ };
+};
+
+&mbox {
+ status = "okay";
+};
+
+&mmc {
+ bus-width = <4>;
+ disable-wp;
+ cap-sd-highspeed;
+ cap-mmc-highspeed;
+ card-detect-delay = <200>;
+ mmc-ddr-1_8v;
+ mmc-hs200-1_8v;
+ sd-uhs-sdr12;
+ sd-uhs-sdr25;
+ sd-uhs-sdr50;
+ sd-uhs-sdr104;
+ status = "okay";
+};
+
+&mmuart0 {
+ status = "okay";
+};
+
+&refclk {
+ clock-frequency = <125000000>;
+};
+
+&rtc {
+ status = "okay";
+};
+
+&syscontroller {
+ status = "okay";
+};
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
new file mode 100644
index 000000000000..3095d08453a1
--- /dev/null
+++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020-2021 Microchip Technology Inc */
+
+/dts-v1/;
+#include "dt-bindings/clock/microchip,mpfs-clock.h"
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ model = "Microchip PolarFire SoC";
+ compatible = "microchip,mpfs";
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ compatible = "sifive,e51", "sifive,rocket0", "riscv";
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <16384>;
+ reg = <0>;
+ riscv,isa = "rv64imac";
+ clocks = <&clkcfg CLK_CPU>;
+ status = "disabled";
+
+ cpu0_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu1: cpu@1 {
+ compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ reg = <1>;
+ riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
+ tlb-split;
+ status = "okay";
+
+ cpu1_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu2: cpu@2 {
+ compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ reg = <2>;
+ riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
+ tlb-split;
+ status = "okay";
+
+ cpu2_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu3: cpu@3 {
+ compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ reg = <3>;
+ riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
+ tlb-split;
+ status = "okay";
+
+ cpu3_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu4: cpu@4 {
+ compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ reg = <4>;
+ riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
+ tlb-split;
+ status = "okay";
+ cpu4_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ };
+
+ refclk: mssrefclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ };
+
+ syscontroller: syscontroller {
+ compatible = "microchip,mpfs-sys-controller";
+ mboxes = <&mbox 0>;
+ };
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ compatible = "simple-bus";
+ ranges;
+
+ cctrllr: cache-controller@2010000 {
+ compatible = "sifive,fu540-c000-ccache", "cache";
+ reg = <0x0 0x2010000 0x0 0x1000>;
+ cache-block-size = <64>;
+ cache-level = <2>;
+ cache-sets = <1024>;
+ cache-size = <2097152>;
+ cache-unified;
+ interrupt-parent = <&plic>;
+ interrupts = <1>, <2>, <3>;
+ };
+
+ clint: clint@2000000 {
+ compatible = "sifive,fu540-c000-clint", "sifive,clint0";
+ reg = <0x0 0x2000000 0x0 0xC000>;
+ interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>,
+ <&cpu1_intc 3>, <&cpu1_intc 7>,
+ <&cpu2_intc 3>, <&cpu2_intc 7>,
+ <&cpu3_intc 3>, <&cpu3_intc 7>,
+ <&cpu4_intc 3>, <&cpu4_intc 7>;
+ };
+
+ plic: interrupt-controller@c000000 {
+ compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
+ reg = <0x0 0xc000000 0x0 0x4000000>;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ interrupts-extended = <&cpu0_intc 11>,
+ <&cpu1_intc 11>, <&cpu1_intc 9>,
+ <&cpu2_intc 11>, <&cpu2_intc 9>,
+ <&cpu3_intc 11>, <&cpu3_intc 9>,
+ <&cpu4_intc 11>, <&cpu4_intc 9>;
+ riscv,ndev = <186>;
+ };
+
+ pdma: dma-controller@3000000 {
+ compatible = "sifive,fu540-c000-pdma", "sifive,pdma0";
+ reg = <0x0 0x3000000 0x0 0x8000>;
+ interrupt-parent = <&plic>;
+ interrupts = <5 6>, <7 8>, <9 10>, <11 12>;
+ dma-channels = <4>;
+ #dma-cells = <1>;
+ };
+
+ clkcfg: clkcfg@20002000 {
+ compatible = "microchip,mpfs-clkcfg";
+ reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>;
+ clocks = <&refclk>;
+ #clock-cells = <1>;
+ };
+
+ mmuart0: serial@20000000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20000000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <90>;
+ current-speed = <115200>;
+ clocks = <&clkcfg CLK_MMUART0>;
+ status = "disabled"; /* Reserved for the HSS */
+ };
+
+ mmuart1: serial@20100000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20100000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <91>;
+ current-speed = <115200>;
+ clocks = <&clkcfg CLK_MMUART1>;
+ status = "disabled";
+ };
+
+ mmuart2: serial@20102000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20102000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <92>;
+ current-speed = <115200>;
+ clocks = <&clkcfg CLK_MMUART2>;
+ status = "disabled";
+ };
+
+ mmuart3: serial@20104000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20104000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <93>;
+ current-speed = <115200>;
+ clocks = <&clkcfg CLK_MMUART3>;
+ status = "disabled";
+ };
+
+ mmuart4: serial@20106000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20106000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <94>;
+ clocks = <&clkcfg CLK_MMUART4>;
+ current-speed = <115200>;
+ status = "disabled";
+ };
+
+ /* Common node entry for emmc/sd */
+ mmc: mmc@20008000 {
+ compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc";
+ reg = <0x0 0x20008000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <88>;
+ clocks = <&clkcfg CLK_MMC>;
+ max-frequency = <200000000>;
+ status = "disabled";
+ };
+
+ spi0: spi@20108000 {
+ compatible = "microchip,mpfs-spi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0 0x20108000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <54>;
+ clocks = <&clkcfg CLK_SPI0>;
+ spi-max-frequency = <25000000>;
+ status = "disabled";
+ };
+
+ spi1: spi@20109000 {
+ compatible = "microchip,mpfs-spi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0 0x20109000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <55>;
+ clocks = <&clkcfg CLK_SPI1>;
+ spi-max-frequency = <25000000>;
+ status = "disabled";
+ };
+
+ qspi: spi@21000000 {
+ compatible = "microchip,mpfs-qspi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0 0x21000000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <85>;
+ clocks = <&clkcfg CLK_QSPI>;
+ spi-max-frequency = <25000000>;
+ status = "disabled";
+ };
+
+ i2c0: i2c@2010a000 {
+ compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7";
+ reg = <0x0 0x2010a000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = <&plic>;
+ interrupts = <58>;
+ clocks = <&clkcfg CLK_I2C0>;
+ clock-frequency = <100000>;
+ status = "disabled";
+ };
+
+ i2c1: i2c@2010b000 {
+ compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7";
+ reg = <0x0 0x2010b000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = <&plic>;
+ interrupts = <61>;
+ clocks = <&clkcfg CLK_I2C1>;
+ clock-frequency = <100000>;
+ status = "disabled";
+ };
+
+ mac0: ethernet@20110000 {
+ compatible = "cdns,macb";
+ reg = <0x0 0x20110000 0x0 0x2000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = <&plic>;
+ interrupts = <64>, <65>, <66>, <67>, <68>, <69>;
+ local-mac-address = [00 00 00 00 00 00];
+ clocks = <&clkcfg CLK_MAC0>, <&clkcfg CLK_AHB>;
+ clock-names = "pclk", "hclk";
+ status = "disabled";
+ };
+
+ mac1: ethernet@20112000 {
+ compatible = "cdns,macb";
+ reg = <0x0 0x20112000 0x0 0x2000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = <&plic>;
+ interrupts = <70>, <71>, <72>, <73>, <74>, <75>;
+ local-mac-address = [00 00 00 00 00 00];
+ clocks = <&clkcfg CLK_MAC1>, <&clkcfg CLK_AHB>;
+ clock-names = "pclk", "hclk";
+ status = "disabled";
+ };
+
+ gpio0: gpio@20120000 {
+ compatible = "microchip,mpfs-gpio";
+ reg = <0x0 0x20120000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ clocks = <&clkcfg CLK_GPIO0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ status = "disabled";
+ };
+
+ gpio1: gpio@20121000 {
+ compatible = "microchip,mpfs-gpio";
+ reg = <0x0 0x20121000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ clocks = <&clkcfg CLK_GPIO1>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ status = "disabled";
+ };
+
+ gpio2: gpio@20122000 {
+ compatible = "microchip,mpfs-gpio";
+ reg = <0x0 0x20122000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ clocks = <&clkcfg CLK_GPIO2>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ status = "disabled";
+ };
+
+ rtc: rtc@20124000 {
+ compatible = "microchip,mpfs-rtc";
+ reg = <0x0 0x20124000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <80>, <81>;
+ clocks = <&clkcfg CLK_RTC>, <&clkcfg CLK_RTCREF>;
+ clock-names = "rtc", "rtcref";
+ status = "disabled";
+ };
+
+ usb: usb@20201000 {
+ compatible = "microchip,mpfs-musb";
+ reg = <0x0 0x20201000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <86>, <87>;
+ clocks = <&clkcfg CLK_USB>;
+ interrupt-names = "dma","mc";
+ status = "disabled";
+ };
+
+ pcie: pcie@2000000000 {
+ compatible = "microchip,pcie-host-1.0";
+ #address-cells = <0x3>;
+ #interrupt-cells = <0x1>;
+ #size-cells = <0x2>;
+ device_type = "pci";
+ reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>;
+ reg-names = "cfg", "apb";
+ bus-range = <0x0 0x7f>;
+ interrupt-parent = <&plic>;
+ interrupts = <119>;
+ interrupt-map = <0 0 0 1 &pcie_intc 0>,
+ <0 0 0 2 &pcie_intc 1>,
+ <0 0 0 3 &pcie_intc 2>,
+ <0 0 0 4 &pcie_intc 3>;
+ interrupt-map-mask = <0 0 0 7>;
+ clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>;
+ clock-names = "fic0", "fic1", "fic3";
+ ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>;
+ msi-parent = <&pcie>;
+ msi-controller;
+ microchip,axi-m-atr0 = <0x10 0x0>;
+ status = "disabled";
+ pcie_intc: legacy-interrupt-controller {
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ };
+ };
+
+ mbox: mailbox@37020000 {
+ compatible = "microchip,mpfs-mailbox";
+ reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318C 0x0 0x40>;
+ interrupt-parent = <&plic>;
+ interrupts = <96>;
+ #mbox-cells = <1>;
+ status = "disabled";
+ };
+ };
+};
diff --git a/arch/riscv/boot/dts/sifive/Makefile b/arch/riscv/boot/dts/sifive/Makefile
index 6d6189e6e4af..d90e4eb0ade8 100644
--- a/arch/riscv/boot/dts/sifive/Makefile
+++ b/arch/riscv/boot/dts/sifive/Makefile
@@ -1,2 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb
+dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb \
+ hifive-unmatched-a00.dtb
+obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y))
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 7db861053483..e3172d0ffac4 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -137,20 +137,21 @@
soc {
#address-cells = <2>;
#size-cells = <2>;
- compatible = "sifive,fu540-c000", "sifive,fu540", "simple-bus";
+ compatible = "simple-bus";
ranges;
plic0: interrupt-controller@c000000 {
- #interrupt-cells = <1>;
- compatible = "sifive,plic-1.0.0";
+ compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
reg = <0x0 0xc000000 0x0 0x4000000>;
- riscv,ndev = <53>;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
interrupt-controller;
- interrupts-extended = <
- &cpu0_intc 0xffffffff
- &cpu1_intc 0xffffffff &cpu1_intc 9
- &cpu2_intc 0xffffffff &cpu2_intc 9
- &cpu3_intc 0xffffffff &cpu3_intc 9
- &cpu4_intc 0xffffffff &cpu4_intc 9>;
+ interrupts-extended =
+ <&cpu0_intc 0xffffffff>,
+ <&cpu1_intc 0xffffffff>, <&cpu1_intc 9>,
+ <&cpu2_intc 0xffffffff>, <&cpu2_intc 9>,
+ <&cpu3_intc 0xffffffff>, <&cpu3_intc 9>,
+ <&cpu4_intc 0xffffffff>, <&cpu4_intc 9>;
+ riscv,ndev = <53>;
};
prci: clock-controller@10000000 {
compatible = "sifive,fu540-c000-prci";
@@ -163,14 +164,16 @@
reg = <0x0 0x10010000 0x0 0x1000>;
interrupt-parent = <&plic0>;
interrupts = <4>;
- clocks = <&prci PRCI_CLK_TLCLK>;
+ clocks = <&prci FU540_PRCI_CLK_TLCLK>;
status = "disabled";
};
- dma: dma@3000000 {
- compatible = "sifive,fu540-c000-pdma";
+ dma: dma-controller@3000000 {
+ compatible = "sifive,fu540-c000-pdma", "sifive,pdma0";
reg = <0x0 0x3000000 0x0 0x8000>;
interrupt-parent = <&plic0>;
- interrupts = <23 24 25 26 27 28 29 30>;
+ interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
+ <30>;
+ dma-channels = <4>;
#dma-cells = <1>;
};
uart1: serial@10011000 {
@@ -178,7 +181,7 @@
reg = <0x0 0x10011000 0x0 0x1000>;
interrupt-parent = <&plic0>;
interrupts = <5>;
- clocks = <&prci PRCI_CLK_TLCLK>;
+ clocks = <&prci FU540_PRCI_CLK_TLCLK>;
status = "disabled";
};
i2c0: i2c@10030000 {
@@ -186,7 +189,7 @@
reg = <0x0 0x10030000 0x0 0x1000>;
interrupt-parent = <&plic0>;
interrupts = <50>;
- clocks = <&prci PRCI_CLK_TLCLK>;
+ clocks = <&prci FU540_PRCI_CLK_TLCLK>;
reg-shift = <2>;
reg-io-width = <1>;
#address-cells = <1>;
@@ -195,22 +198,22 @@
};
qspi0: spi@10040000 {
compatible = "sifive,fu540-c000-spi", "sifive,spi0";
- reg = <0x0 0x10040000 0x0 0x1000
- 0x0 0x20000000 0x0 0x10000000>;
+ reg = <0x0 0x10040000 0x0 0x1000>,
+ <0x0 0x20000000 0x0 0x10000000>;
interrupt-parent = <&plic0>;
interrupts = <51>;
- clocks = <&prci PRCI_CLK_TLCLK>;
+ clocks = <&prci FU540_PRCI_CLK_TLCLK>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
};
qspi1: spi@10041000 {
compatible = "sifive,fu540-c000-spi", "sifive,spi0";
- reg = <0x0 0x10041000 0x0 0x1000
- 0x0 0x30000000 0x0 0x10000000>;
+ reg = <0x0 0x10041000 0x0 0x1000>,
+ <0x0 0x30000000 0x0 0x10000000>;
interrupt-parent = <&plic0>;
interrupts = <52>;
- clocks = <&prci PRCI_CLK_TLCLK>;
+ clocks = <&prci FU540_PRCI_CLK_TLCLK>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -220,7 +223,7 @@
reg = <0x0 0x10050000 0x0 0x1000>;
interrupt-parent = <&plic0>;
interrupts = <6>;
- clocks = <&prci PRCI_CLK_TLCLK>;
+ clocks = <&prci FU540_PRCI_CLK_TLCLK>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -229,12 +232,12 @@
compatible = "sifive,fu540-c000-gem";
interrupt-parent = <&plic0>;
interrupts = <53>;
- reg = <0x0 0x10090000 0x0 0x2000
- 0x0 0x100a0000 0x0 0x1000>;
+ reg = <0x0 0x10090000 0x0 0x2000>,
+ <0x0 0x100a0000 0x0 0x1000>;
local-mac-address = [00 00 00 00 00 00];
clock-names = "pclk", "hclk";
- clocks = <&prci PRCI_CLK_GEMGXLPLL>,
- <&prci PRCI_CLK_GEMGXLPLL>;
+ clocks = <&prci FU540_PRCI_CLK_GEMGXLPLL>,
+ <&prci FU540_PRCI_CLK_GEMGXLPLL>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -243,8 +246,8 @@
compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
reg = <0x0 0x10020000 0x0 0x1000>;
interrupt-parent = <&plic0>;
- interrupts = <42 43 44 45>;
- clocks = <&prci PRCI_CLK_TLCLK>;
+ interrupts = <42>, <43>, <44>, <45>;
+ clocks = <&prci FU540_PRCI_CLK_TLCLK>;
#pwm-cells = <3>;
status = "disabled";
};
@@ -252,8 +255,8 @@
compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
reg = <0x0 0x10021000 0x0 0x1000>;
interrupt-parent = <&plic0>;
- interrupts = <46 47 48 49>;
- clocks = <&prci PRCI_CLK_TLCLK>;
+ interrupts = <46>, <47>, <48>, <49>;
+ clocks = <&prci FU540_PRCI_CLK_TLCLK>;
#pwm-cells = <3>;
status = "disabled";
};
@@ -265,7 +268,7 @@
cache-size = <2097152>;
cache-unified;
interrupt-parent = <&plic0>;
- interrupts = <1 2 3>;
+ interrupts = <1>, <2>, <3>;
reg = <0x0 0x2010000 0x0 0x1000>;
};
gpio: gpio@10060000 {
@@ -279,7 +282,7 @@
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <2>;
- clocks = <&prci PRCI_CLK_TLCLK>;
+ clocks = <&prci FU540_PRCI_CLK_TLCLK>;
status = "disabled";
};
};
diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
new file mode 100644
index 000000000000..7b77c13496d8
--- /dev/null
+++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020 SiFive, Inc */
+
+/dts-v1/;
+
+#include <dt-bindings/clock/sifive-fu740-prci.h>
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ compatible = "sifive,fu740-c000", "sifive,fu740";
+
+ aliases {
+ serial0 = &uart0;
+ serial1 = &uart1;
+ ethernet0 = &eth0;
+ };
+
+ chosen {
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ cpu0: cpu@0 {
+ compatible = "sifive,bullet0", "riscv";
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <16384>;
+ next-level-cache = <&ccache>;
+ reg = <0x0>;
+ riscv,isa = "rv64imac";
+ status = "disabled";
+ cpu0_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ cpu1: cpu@1 {
+ compatible = "sifive,bullet0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <40>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <40>;
+ mmu-type = "riscv,sv39";
+ next-level-cache = <&ccache>;
+ reg = <0x1>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ cpu1_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ cpu2: cpu@2 {
+ compatible = "sifive,bullet0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <40>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <40>;
+ mmu-type = "riscv,sv39";
+ next-level-cache = <&ccache>;
+ reg = <0x2>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ cpu2_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ cpu3: cpu@3 {
+ compatible = "sifive,bullet0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <40>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <40>;
+ mmu-type = "riscv,sv39";
+ next-level-cache = <&ccache>;
+ reg = <0x3>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ cpu3_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ cpu4: cpu@4 {
+ compatible = "sifive,bullet0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <40>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <40>;
+ mmu-type = "riscv,sv39";
+ next-level-cache = <&ccache>;
+ reg = <0x4>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ cpu4_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ };
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ compatible = "simple-bus";
+ ranges;
+ plic0: interrupt-controller@c000000 {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
+ reg = <0x0 0xc000000 0x0 0x4000000>;
+ riscv,ndev = <69>;
+ interrupt-controller;
+ interrupts-extended =
+ <&cpu0_intc 0xffffffff>,
+ <&cpu1_intc 0xffffffff>, <&cpu1_intc 9>,
+ <&cpu2_intc 0xffffffff>, <&cpu2_intc 9>,
+ <&cpu3_intc 0xffffffff>, <&cpu3_intc 9>,
+ <&cpu4_intc 0xffffffff>, <&cpu4_intc 9>;
+ };
+ prci: clock-controller@10000000 {
+ compatible = "sifive,fu740-c000-prci";
+ reg = <0x0 0x10000000 0x0 0x1000>;
+ clocks = <&hfclk>, <&rtcclk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+ uart0: serial@10010000 {
+ compatible = "sifive,fu740-c000-uart", "sifive,uart0";
+ reg = <0x0 0x10010000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <39>;
+ clocks = <&prci FU740_PRCI_CLK_PCLK>;
+ status = "disabled";
+ };
+ uart1: serial@10011000 {
+ compatible = "sifive,fu740-c000-uart", "sifive,uart0";
+ reg = <0x0 0x10011000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <40>;
+ clocks = <&prci FU740_PRCI_CLK_PCLK>;
+ status = "disabled";
+ };
+ i2c0: i2c@10030000 {
+ compatible = "sifive,fu740-c000-i2c", "sifive,i2c0";
+ reg = <0x0 0x10030000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <52>;
+ clocks = <&prci FU740_PRCI_CLK_PCLK>;
+ reg-shift = <2>;
+ reg-io-width = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ i2c1: i2c@10031000 {
+ compatible = "sifive,fu740-c000-i2c", "sifive,i2c0";
+ reg = <0x0 0x10031000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <53>;
+ clocks = <&prci FU740_PRCI_CLK_PCLK>;
+ reg-shift = <2>;
+ reg-io-width = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ qspi0: spi@10040000 {
+ compatible = "sifive,fu740-c000-spi", "sifive,spi0";
+ reg = <0x0 0x10040000 0x0 0x1000>,
+ <0x0 0x20000000 0x0 0x10000000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <41>;
+ clocks = <&prci FU740_PRCI_CLK_PCLK>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ qspi1: spi@10041000 {
+ compatible = "sifive,fu740-c000-spi", "sifive,spi0";
+ reg = <0x0 0x10041000 0x0 0x1000>,
+ <0x0 0x30000000 0x0 0x10000000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <42>;
+ clocks = <&prci FU740_PRCI_CLK_PCLK>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ spi0: spi@10050000 {
+ compatible = "sifive,fu740-c000-spi", "sifive,spi0";
+ reg = <0x0 0x10050000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <43>;
+ clocks = <&prci FU740_PRCI_CLK_PCLK>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ eth0: ethernet@10090000 {
+ compatible = "sifive,fu540-c000-gem";
+ interrupt-parent = <&plic0>;
+ interrupts = <55>;
+ reg = <0x0 0x10090000 0x0 0x2000>,
+ <0x0 0x100a0000 0x0 0x1000>;
+ local-mac-address = [00 00 00 00 00 00];
+ clock-names = "pclk", "hclk";
+ clocks = <&prci FU740_PRCI_CLK_GEMGXLPLL>,
+ <&prci FU740_PRCI_CLK_GEMGXLPLL>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ pwm0: pwm@10020000 {
+ compatible = "sifive,fu740-c000-pwm", "sifive,pwm0";
+ reg = <0x0 0x10020000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <44>, <45>, <46>, <47>;
+ clocks = <&prci FU740_PRCI_CLK_PCLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+ pwm1: pwm@10021000 {
+ compatible = "sifive,fu740-c000-pwm", "sifive,pwm0";
+ reg = <0x0 0x10021000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <48>, <49>, <50>, <51>;
+ clocks = <&prci FU740_PRCI_CLK_PCLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+ ccache: cache-controller@2010000 {
+ compatible = "sifive,fu740-c000-ccache", "cache";
+ cache-block-size = <64>;
+ cache-level = <2>;
+ cache-sets = <2048>;
+ cache-size = <2097152>;
+ cache-unified;
+ interrupt-parent = <&plic0>;
+ interrupts = <19>, <21>, <22>, <20>;
+ reg = <0x0 0x2010000 0x0 0x1000>;
+ };
+ gpio: gpio@10060000 {
+ compatible = "sifive,fu740-c000-gpio", "sifive,gpio0";
+ interrupt-parent = <&plic0>;
+ interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
+ <30>, <31>, <32>, <33>, <34>, <35>, <36>,
+ <37>, <38>;
+ reg = <0x0 0x10060000 0x0 0x1000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&prci FU740_PRCI_CLK_PCLK>;
+ status = "disabled";
+ };
+ pcie@e00000000 {
+ compatible = "sifive,fu740-pcie";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ reg = <0xe 0x00000000 0x0 0x80000000>,
+ <0xd 0xf0000000 0x0 0x10000000>,
+ <0x0 0x100d0000 0x0 0x1000>;
+ reg-names = "dbi", "config", "mgmt";
+ device_type = "pci";
+ dma-coherent;
+ bus-range = <0x0 0xff>;
+ ranges = <0x81000000 0x0 0x60080000 0x0 0x60080000 0x0 0x10000>, /* I/O */
+ <0x82000000 0x0 0x60090000 0x0 0x60090000 0x0 0xff70000>, /* mem */
+ <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x1000000>, /* mem */
+ <0xc3000000 0x20 0x00000000 0x20 0x00000000 0x20 0x00000000>; /* mem prefetchable */
+ num-lanes = <0x8>;
+ interrupts = <56>, <57>, <58>, <59>, <60>, <61>, <62>, <63>, <64>;
+ interrupt-names = "msi", "inta", "intb", "intc", "intd";
+ interrupt-parent = <&plic0>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+ interrupt-map = <0x0 0x0 0x0 0x1 &plic0 57>,
+ <0x0 0x0 0x0 0x2 &plic0 58>,
+ <0x0 0x0 0x0 0x3 &plic0 59>,
+ <0x0 0x0 0x0 0x4 &plic0 60>;
+ clock-names = "pcie_aux";
+ clocks = <&prci FU740_PRCI_CLK_PCIE_AUX>;
+ pwren-gpios = <&gpio 5 0>;
+ reset-gpios = <&gpio 8 0>;
+ resets = <&prci 4>;
+ status = "okay";
+ };
+ };
+};
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 4a2729f5ca3f..ced0d4e47938 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -8,10 +8,9 @@
#define RTCCLK_FREQ 1000000
/ {
- #address-cells = <2>;
- #size-cells = <2>;
model = "SiFive HiFive Unleashed A00";
- compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000";
+ compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000",
+ "sifive,fu540";
chosen {
stdout-path = "serial0";
@@ -26,9 +25,6 @@
reg = <0x0 0x80000000 0x2 0x00000000>;
};
- soc {
- };
-
hfclk: hfclk {
#clock-cells = <0>;
compatible = "fixed-clock";
@@ -63,7 +59,7 @@
&qspi0 {
status = "okay";
flash@0 {
- compatible = "issi,is25wp256", "jedec,spi-nor";
+ compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
m25p,fast-read;
@@ -80,6 +76,7 @@
spi-max-frequency = <20000000>;
voltage-ranges = <3300 3300>;
disable-wp;
+ gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
};
};
@@ -88,6 +85,7 @@
phy-mode = "gmii";
phy-handle = <&phy0>;
phy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-id0007.0771";
reg = <0>;
};
};
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
new file mode 100644
index 000000000000..c4ed9efdff03
--- /dev/null
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020 SiFive, Inc */
+
+#include "fu740-c000.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/* Clock frequency (in Hz) of the PCB crystal for rtcclk */
+#define RTCCLK_FREQ 1000000
+
+/ {
+ model = "SiFive HiFive Unmatched A00";
+ compatible = "sifive,hifive-unmatched-a00", "sifive,fu740-c000",
+ "sifive,fu740";
+
+ chosen {
+ stdout-path = "serial0";
+ };
+
+ cpus {
+ timebase-frequency = <RTCCLK_FREQ>;
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x4 0x00000000>;
+ };
+
+ hfclk: hfclk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <26000000>;
+ clock-output-names = "hfclk";
+ };
+
+ rtcclk: rtcclk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <RTCCLK_FREQ>;
+ clock-output-names = "rtcclk";
+ };
+
+ gpio-poweroff {
+ compatible = "gpio-poweroff";
+ gpios = <&gpio 2 GPIO_ACTIVE_LOW>;
+ };
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&uart1 {
+ status = "okay";
+};
+
+&i2c0 {
+ status = "okay";
+
+ temperature-sensor@4c {
+ compatible = "ti,tmp451";
+ reg = <0x4c>;
+ vcc-supply = <&vdd_bpro>;
+ interrupt-parent = <&gpio>;
+ interrupts = <6 IRQ_TYPE_LEVEL_LOW>;
+ };
+
+ eeprom@54 {
+ compatible = "microchip,24c02", "atmel,24c02";
+ reg = <0x54>;
+ vcc-supply = <&vdd_bpro>;
+ label = "board-id";
+ pagesize = <16>;
+ read-only;
+ size = <256>;
+ };
+
+ pmic@58 {
+ compatible = "dlg,da9063";
+ reg = <0x58>;
+ interrupt-parent = <&gpio>;
+ interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-controller;
+
+ onkey {
+ compatible = "dlg,da9063-onkey";
+ };
+
+ rtc {
+ compatible = "dlg,da9063-rtc";
+ };
+
+ wdt {
+ compatible = "dlg,da9063-watchdog";
+ };
+
+ regulators {
+ vdd_bcore: bcores-merged {
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-min-microamp = <4800000>;
+ regulator-max-microamp = <4800000>;
+ regulator-always-on;
+ };
+
+ vdd_bpro: bpro {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-min-microamp = <2400000>;
+ regulator-max-microamp = <2400000>;
+ regulator-always-on;
+ };
+
+ vdd_bperi: bperi {
+ regulator-min-microvolt = <1060000>;
+ regulator-max-microvolt = <1060000>;
+ regulator-min-microamp = <1500000>;
+ regulator-max-microamp = <1500000>;
+ regulator-always-on;
+ };
+
+ vdd_bmem_bio: bmem-bio-merged {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-min-microamp = <3000000>;
+ regulator-max-microamp = <3000000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo1: ldo1 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo2: ldo2 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo3: ldo3 {
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo4: ldo4 {
+ regulator-min-microvolt = <2500000>;
+ regulator-max-microvolt = <2500000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo5: ldo5 {
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo6: ldo6 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo7: ldo7 {
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo8: ldo8 {
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vdd_ld09: ldo9 {
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo10: ldo10 {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo11: ldo11 {
+ regulator-min-microvolt = <2500000>;
+ regulator-max-microvolt = <2500000>;
+ regulator-always-on;
+ };
+ };
+ };
+};
+
+&qspi0 {
+ status = "okay";
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
+ m25p,fast-read;
+ spi-tx-bus-width = <4>;
+ spi-rx-bus-width = <4>;
+ };
+};
+
+&spi0 {
+ status = "okay";
+ mmc@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ spi-max-frequency = <20000000>;
+ voltage-ranges = <3300 3300>;
+ disable-wp;
+ gpios = <&gpio 15 GPIO_ACTIVE_LOW>;
+ };
+};
+
+&eth0 {
+ status = "okay";
+ phy-mode = "gmii";
+ phy-handle = <&phy0>;
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+};
+
+&pwm0 {
+ status = "okay";
+};
+
+&pwm1 {
+ status = "okay";
+};
+
+&gpio {
+ status = "okay";
+ gpio-line-names = "J29.1", "PMICNTB", "PMICSHDN", "J8.1", "J8.3",
+ "PCIe_PWREN", "THERM", "UBRDG_RSTN", "PCIe_PERSTN",
+ "ULPI_RSTN", "J8.2", "UHUB_RSTN", "GEMGXL_RST", "J8.4",
+ "EN_VDD_SD", "SD_CD";
+};
diff --git a/arch/riscv/boot/dts/starfive/Makefile b/arch/riscv/boot/dts/starfive/Makefile
new file mode 100644
index 000000000000..0ea1bc15ab30
--- /dev/null
+++ b/arch/riscv/boot/dts/starfive/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_SOC_STARFIVE) += jh7100-beaglev-starlight.dtb
diff --git a/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts
new file mode 100644
index 000000000000..c9af67f7a0d2
--- /dev/null
+++ b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2021 StarFive Technology Co., Ltd.
+ * Copyright (C) 2021 Emil Renner Berthing <kernel@esmil.dk>
+ */
+
+/dts-v1/;
+#include "jh7100.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/pinctrl/pinctrl-starfive.h>
+
+/ {
+ model = "BeagleV Starlight Beta";
+ compatible = "beagle,beaglev-starlight-jh7100-r0", "starfive,jh7100";
+
+ aliases {
+ serial0 = &uart3;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ cpus {
+ timebase-frequency = <6250000>;
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x2 0x0>;
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-ack {
+ gpios = <&gpio 43 GPIO_ACTIVE_HIGH>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_HEARTBEAT;
+ linux,default-trigger = "heartbeat";
+ label = "ack";
+ };
+ };
+};
+
+&gpio {
+ i2c0_pins: i2c0-0 {
+ i2c-pins {
+ pinmux = <GPIOMUX(62, GPO_LOW,
+ GPO_I2C0_PAD_SCK_OEN,
+ GPI_I2C0_PAD_SCK_IN)>,
+ <GPIOMUX(61, GPO_LOW,
+ GPO_I2C0_PAD_SDA_OEN,
+ GPI_I2C0_PAD_SDA_IN)>;
+ bias-disable; /* external pull-up */
+ input-enable;
+ input-schmitt-enable;
+ };
+ };
+
+ i2c1_pins: i2c1-0 {
+ i2c-pins {
+ pinmux = <GPIOMUX(47, GPO_LOW,
+ GPO_I2C1_PAD_SCK_OEN,
+ GPI_I2C1_PAD_SCK_IN)>,
+ <GPIOMUX(48, GPO_LOW,
+ GPO_I2C1_PAD_SDA_OEN,
+ GPI_I2C1_PAD_SDA_IN)>;
+ bias-pull-up;
+ input-enable;
+ input-schmitt-enable;
+ };
+ };
+
+ i2c2_pins: i2c2-0 {
+ i2c-pins {
+ pinmux = <GPIOMUX(60, GPO_LOW,
+ GPO_I2C2_PAD_SCK_OEN,
+ GPI_I2C2_PAD_SCK_IN)>,
+ <GPIOMUX(59, GPO_LOW,
+ GPO_I2C2_PAD_SDA_OEN,
+ GPI_I2C2_PAD_SDA_IN)>;
+ bias-disable; /* external pull-up */
+ input-enable;
+ input-schmitt-enable;
+ };
+ };
+
+ uart3_pins: uart3-0 {
+ rx-pins {
+ pinmux = <GPIOMUX(13, GPO_LOW, GPO_DISABLE,
+ GPI_UART3_PAD_SIN)>;
+ bias-pull-up;
+ drive-strength = <14>;
+ input-enable;
+ input-schmitt-enable;
+ slew-rate = <0>;
+ };
+ tx-pins {
+ pinmux = <GPIOMUX(14, GPO_UART3_PAD_SOUT,
+ GPO_ENABLE, GPI_NONE)>;
+ bias-disable;
+ drive-strength = <35>;
+ input-disable;
+ input-schmitt-disable;
+ slew-rate = <0>;
+ };
+ };
+};
+
+&i2c0 {
+ clock-frequency = <100000>;
+ i2c-sda-hold-time-ns = <300>;
+ i2c-sda-falling-time-ns = <500>;
+ i2c-scl-falling-time-ns = <500>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0_pins>;
+ status = "okay";
+
+ pmic@5e {
+ compatible = "ti,tps65086";
+ reg = <0x5e>;
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ regulators {
+ };
+ };
+};
+
+&i2c1 {
+ clock-frequency = <400000>;
+ i2c-sda-hold-time-ns = <300>;
+ i2c-sda-falling-time-ns = <100>;
+ i2c-scl-falling-time-ns = <100>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c1_pins>;
+ status = "okay";
+};
+
+&i2c2 {
+ clock-frequency = <100000>;
+ i2c-sda-hold-time-ns = <300>;
+ i2c-sda-falling-time-ns = <500>;
+ i2c-scl-falling-time-ns = <500>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c2_pins>;
+ status = "okay";
+};
+
+&osc_sys {
+ clock-frequency = <25000000>;
+};
+
+&osc_aud {
+ clock-frequency = <27000000>;
+};
+
+&uart3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart3_pins>;
+ status = "okay";
+};
diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi
new file mode 100644
index 000000000000..69f22f9aad9d
--- /dev/null
+++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2021 StarFive Technology Co., Ltd.
+ * Copyright (C) 2021 Emil Renner Berthing <kernel@esmil.dk>
+ */
+
+/dts-v1/;
+#include <dt-bindings/clock/starfive-jh7100.h>
+#include <dt-bindings/reset/starfive-jh7100.h>
+
+/ {
+ compatible = "starfive,jh7100";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "sifive,u74-mc", "riscv";
+ reg = <0>;
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+
+ cpu0_intc: interrupt-controller {
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+ };
+
+ cpu@1 {
+ compatible = "sifive,u74-mc", "riscv";
+ reg = <1>;
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+
+ cpu1_intc: interrupt-controller {
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+ };
+ };
+
+ osc_sys: osc_sys {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ /* This value must be overridden by the board */
+ clock-frequency = <0>;
+ };
+
+ osc_aud: osc_aud {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ /* This value must be overridden by the board */
+ clock-frequency = <0>;
+ };
+
+ gmac_rmii_ref: gmac_rmii_ref {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ /* Should be overridden by the board when needed */
+ clock-frequency = <0>;
+ };
+
+ gmac_gr_mii_rxclk: gmac_gr_mii_rxclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ /* Should be overridden by the board when needed */
+ clock-frequency = <0>;
+ };
+
+ soc {
+ compatible = "simple-bus";
+ interrupt-parent = <&plic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ clint: clint@2000000 {
+ compatible = "starfive,jh7100-clint", "sifive,clint0";
+ reg = <0x0 0x2000000 0x0 0x10000>;
+ interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
+ &cpu1_intc 3 &cpu1_intc 7>;
+ };
+
+ plic: interrupt-controller@c000000 {
+ compatible = "starfive,jh7100-plic", "sifive,plic-1.0.0";
+ reg = <0x0 0xc000000 0x0 0x4000000>;
+ interrupts-extended = <&cpu0_intc 11 &cpu0_intc 9
+ &cpu1_intc 11 &cpu1_intc 9>;
+ interrupt-controller;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ riscv,ndev = <127>;
+ };
+
+ clkgen: clock-controller@11800000 {
+ compatible = "starfive,jh7100-clkgen";
+ reg = <0x0 0x11800000 0x0 0x10000>;
+ clocks = <&osc_sys>, <&osc_aud>, <&gmac_rmii_ref>, <&gmac_gr_mii_rxclk>;
+ clock-names = "osc_sys", "osc_aud", "gmac_rmii_ref", "gmac_gr_mii_rxclk";
+ #clock-cells = <1>;
+ };
+
+ rstgen: reset-controller@11840000 {
+ compatible = "starfive,jh7100-reset";
+ reg = <0x0 0x11840000 0x0 0x10000>;
+ #reset-cells = <1>;
+ };
+
+ i2c0: i2c@118b0000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x0 0x118b0000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_I2C0_CORE>,
+ <&clkgen JH7100_CLK_I2C0_APB>;
+ clock-names = "ref", "pclk";
+ resets = <&rstgen JH7100_RSTN_I2C0_APB>;
+ interrupts = <96>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ i2c1: i2c@118c0000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x0 0x118c0000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_I2C1_CORE>,
+ <&clkgen JH7100_CLK_I2C1_APB>;
+ clock-names = "ref", "pclk";
+ resets = <&rstgen JH7100_RSTN_I2C1_APB>;
+ interrupts = <97>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ gpio: pinctrl@11910000 {
+ compatible = "starfive,jh7100-pinctrl";
+ reg = <0x0 0x11910000 0x0 0x10000>,
+ <0x0 0x11858000 0x0 0x1000>;
+ reg-names = "gpio", "padctl";
+ clocks = <&clkgen JH7100_CLK_GPIO_APB>;
+ resets = <&rstgen JH7100_RSTN_GPIO_APB>;
+ interrupts = <32>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ uart2: serial@12430000 {
+ compatible = "starfive,jh7100-uart", "snps,dw-apb-uart";
+ reg = <0x0 0x12430000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_UART2_CORE>,
+ <&clkgen JH7100_CLK_UART2_APB>;
+ clock-names = "baudclk", "apb_pclk";
+ resets = <&rstgen JH7100_RSTN_UART2_APB>;
+ interrupts = <72>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ status = "disabled";
+ };
+
+ uart3: serial@12440000 {
+ compatible = "starfive,jh7100-uart", "snps,dw-apb-uart";
+ reg = <0x0 0x12440000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_UART3_CORE>,
+ <&clkgen JH7100_CLK_UART3_APB>;
+ clock-names = "baudclk", "apb_pclk";
+ resets = <&rstgen JH7100_RSTN_UART3_APB>;
+ interrupts = <73>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ status = "disabled";
+ };
+
+ i2c2: i2c@12450000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x0 0x12450000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_I2C2_CORE>,
+ <&clkgen JH7100_CLK_I2C2_APB>;
+ clock-names = "ref", "pclk";
+ resets = <&rstgen JH7100_RSTN_I2C2_APB>;
+ interrupts = <74>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ i2c3: i2c@12460000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x0 0x12460000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_I2C3_CORE>,
+ <&clkgen JH7100_CLK_I2C3_APB>;
+ clock-names = "ref", "pclk";
+ resets = <&rstgen JH7100_RSTN_I2C3_APB>;
+ interrupts = <75>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ };
+};
diff --git a/arch/riscv/boot/install.sh b/arch/riscv/boot/install.sh
index 18c39159c0ff..4c63f3f0643d 100644..100755
--- a/arch/riscv/boot/install.sh
+++ b/arch/riscv/boot/install.sh
@@ -1,7 +1,5 @@
#!/bin/sh
#
-# arch/riscv/boot/install.sh
-#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
# for more details.
@@ -18,25 +16,6 @@
# $2 - kernel image file
# $3 - kernel map file
# $4 - default install path (blank if root directory)
-#
-
-verify () {
- if [ ! -f "$1" ]; then
- echo "" 1>&2
- echo " *** Missing file: $1" 1>&2
- echo ' *** You need to run "make" before "make install".' 1>&2
- echo "" 1>&2
- exit 1
- fi
-}
-
-# Make sure the files actually exist
-verify "$2"
-verify "$3"
-
-# User may have a custom install script
-if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
-if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
if [ "$(basename $2)" = "Image.gz" ]; then
# Compressed install
diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
index 47a5003c2e28..62d94696a19c 100644
--- a/arch/riscv/boot/loader.lds.S
+++ b/arch/riscv/boot/loader.lds.S
@@ -1,13 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/page.h>
+#include <asm/pgtable.h>
OUTPUT_ARCH(riscv)
ENTRY(_start)
SECTIONS
{
- . = PAGE_OFFSET;
+ . = KERNEL_LINK_ADDR;
.payload : {
*(.payload)
diff --git a/arch/riscv/configs/32-bit.config b/arch/riscv/configs/32-bit.config
new file mode 100644
index 000000000000..43f41323b67e
--- /dev/null
+++ b/arch/riscv/configs/32-bit.config
@@ -0,0 +1,2 @@
+CONFIG_ARCH_RV32I=y
+CONFIG_32BIT=y
diff --git a/arch/riscv/configs/64-bit.config b/arch/riscv/configs/64-bit.config
new file mode 100644
index 000000000000..313edc554d84
--- /dev/null
+++ b/arch/riscv/configs/64-bit.config
@@ -0,0 +1,2 @@
+CONFIG_ARCH_RV64I=y
+CONFIG_64BIT=y
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 4da4886246a4..0cc17db8aaba 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -2,6 +2,7 @@ CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
@@ -13,10 +14,18 @@ CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
-CONFIG_BPF_SYSCALL=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_PROFILING=y
+CONFIG_SOC_MICROCHIP_POLARFIRE=y
CONFIG_SOC_SIFIVE=y
CONFIG_SOC_VIRT=y
CONFIG_SMP=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PM=y
+CONFIG_CPU_IDLE=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
+CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_NET=y
@@ -36,10 +45,12 @@ CONFIG_PCI=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_HOST_GENERIC=y
CONFIG_PCIE_XILINX=y
+CONFIG_PCIE_FU740=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_NVME=m
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_SCSI_VIRTIO=y
@@ -56,18 +67,19 @@ CONFIG_INPUT_MOUSEDEV=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
-CONFIG_HVC_RISCV_SBI=y
CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
CONFIG_SPI=y
CONFIG_SPI_SIFIVE=y
# CONFIG_PTP_1588_CLOCK is not set
-CONFIG_POWER_RESET=y
-CONFIG_DRM=y
-CONFIG_DRM_RADEON=y
-CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SIFIVE=y
+CONFIG_DRM=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_NOUVEAU=m
+CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
CONFIG_USB_XHCI_HCD=y
@@ -79,6 +91,9 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_CADENCE=y
CONFIG_MMC_SPI=y
CONFIG_RTC_CLASS=y
CONFIG_VIRTIO_PCI=y
@@ -86,6 +101,7 @@ CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_INPUT=y
CONFIG_VIRTIO_MMIO=y
CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_CTRL=y
CONFIG_RPMSG_VIRTIO=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
@@ -94,12 +110,15 @@ CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y
CONFIG_NFS_V4_2=y
CONFIG_ROOT_NFS=y
CONFIG_9P_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=m
CONFIG_CRYPTO_USER_API_HASH=y
CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_PRINTK_TIME=y
@@ -124,8 +143,6 @@ CONFIG_DEBUG_PLIST=y
CONFIG_DEBUG_SG=y
# CONFIG_RCU_TRACE is not set
CONFIG_RCU_EQS_DEBUG=y
-CONFIG_DEBUG_BLOCK_EXT_DEVT=y
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
-# CONFIG_SYSFS_SYSCALL is not set
diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig
index b48138e329ea..2438fa39f8ae 100644
--- a/arch/riscv/configs/nommu_k210_defconfig
+++ b/arch/riscv/configs/nommu_k210_defconfig
@@ -1,17 +1,19 @@
# CONFIG_CPU_ISOLATION is not set
-CONFIG_LOG_BUF_SHIFT=15
+CONFIG_LOG_BUF_SHIFT=13
CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_FORCE=y
+# CONFIG_RD_GZIP is not set
# CONFIG_RD_BZIP2 is not set
# CONFIG_RD_LZMA is not set
# CONFIG_RD_XZ is not set
# CONFIG_RD_LZO is not set
# CONFIG_RD_LZ4 is not set
+# CONFIG_RD_ZSTD is not set
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_SYSFS_SYSCALL is not set
# CONFIG_FHANDLE is not set
# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
# CONFIG_TIMERFD is not set
@@ -19,20 +21,20 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_AIO is not set
# CONFIG_IO_URING is not set
# CONFIG_ADVISE_SYSCALLS is not set
-# CONFIG_MEMBARRIER is not set
# CONFIG_KALLSYMS is not set
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
-# CONFIG_SLAB_MERGE_DEFAULT is not set
# CONFIG_MMU is not set
-CONFIG_SOC_KENDRYTE=y
-CONFIG_MAXPHYSMEM_2GB=y
+CONFIG_SOC_CANAAN=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0"
CONFIG_CMDLINE_FORCE=y
+# CONFIG_SECCOMP is not set
+# CONFIG_STACKPROTECTOR is not set
+# CONFIG_GCC_PLUGINS is not set
# CONFIG_BLOCK is not set
CONFIG_BINFMT_FLAT=y
# CONFIG_COREDUMP is not set
@@ -40,23 +42,46 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_FW_LOADER is not set
# CONFIG_ALLOW_DEV_COREDUMP is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
# CONFIG_LEGACY_PTYS is not set
# CONFIG_LDISC_AUTOLOAD is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_DEVMEM is not set
+CONFIG_I2C=y
+# CONFIG_I2C_COMPAT is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_SPI=y
+# CONFIG_SPI_MEM is not set
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
+# CONFIG_GPIO_CDEV_V1 is not set
+CONFIG_GPIO_DWAPB=y
+CONFIG_GPIO_SIFIVE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
# CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_USER=y
# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
+# CONFIG_FILE_LOCKING is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
# CONFIG_MISC_FILESYSTEMS is not set
CONFIG_LSM="[]"
CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
+# CONFIG_FRAME_POINTER is not set
# CONFIG_DEBUG_MISC is not set
CONFIG_PANIC_ON_OOPS=y
# CONFIG_SCHED_DEBUG is not set
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
new file mode 100644
index 000000000000..9a133e63ae5b
--- /dev/null
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -0,0 +1,87 @@
+# CONFIG_CPU_ISOLATION is not set
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_FHANDLE is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_AIO is not set
+# CONFIG_IO_URING is not set
+# CONFIG_ADVISE_SYSCALLS is not set
+# CONFIG_KALLSYMS is not set
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLOB=y
+# CONFIG_MMU is not set
+CONFIG_SOC_CANAAN=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro"
+CONFIG_CMDLINE_FORCE=y
+# CONFIG_SECCOMP is not set
+# CONFIG_STACKPROTECTOR is not set
+# CONFIG_GCC_PLUGINS is not set
+# CONFIG_MQ_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+CONFIG_BINFMT_FLAT=y
+# CONFIG_COREDUMP is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+# CONFIG_BLK_DEV is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_LDISC_AUTOLOAD is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_DEVMEM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_SPI=y
+# CONFIG_SPI_MEM is not set
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
+# CONFIG_GPIO_CDEV_V1 is not set
+CONFIG_GPIO_DWAPB=y
+CONFIG_GPIO_SIFIVE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+CONFIG_MMC_SPI=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_USER=y
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
+CONFIG_EXT2_FS=y
+# CONFIG_FILE_LOCKING is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_LSM="[]"
+CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_DEBUG_MISC is not set
+CONFIG_PANIC_ON_OOPS=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_RCU_TRACE is not set
+# CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig
index cf74e179bf90..5269fbb6b4fc 100644
--- a/arch/riscv/configs/nommu_virt_defconfig
+++ b/arch/riscv/configs/nommu_virt_defconfig
@@ -19,18 +19,16 @@ CONFIG_EXPERT=y
# CONFIG_AIO is not set
# CONFIG_IO_URING is not set
# CONFIG_ADVISE_SYSCALLS is not set
-# CONFIG_MEMBARRIER is not set
# CONFIG_KALLSYMS is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
-# CONFIG_SLAB_MERGE_DEFAULT is not set
# CONFIG_MMU is not set
-CONFIG_MAXPHYSMEM_2GB=y
+CONFIG_SOC_VIRT=y
CONFIG_SMP=y
CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
CONFIG_CMDLINE_FORCE=y
-# CONFIG_BLK_DEV_BSG is not set
+CONFIG_JUMP_LABEL=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
# CONFIG_EFI_PARTITION is not set
@@ -48,7 +46,6 @@ CONFIG_VIRTIO_BLK=y
# CONFIG_SERIO is not set
# CONFIG_LEGACY_PTYS is not set
# CONFIG_LDISC_AUTOLOAD is not set
-# CONFIG_DEVMEM is not set
CONFIG_SERIAL_8250=y
# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
CONFIG_SERIAL_8250_CONSOLE=y
@@ -56,16 +53,13 @@ CONFIG_SERIAL_8250_NR_UARTS=1
CONFIG_SERIAL_8250_RUNTIME_UARTS=1
CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
+# CONFIG_DEVMEM is not set
# CONFIG_HWMON is not set
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
# CONFIG_VGA_CONSOLE is not set
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_SIFIVE_PLIC=y
-# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT2_FS=y
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 05bbf5240569..6cd9d84d3e13 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -2,6 +2,7 @@ CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
@@ -13,10 +14,18 @@ CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
-CONFIG_BPF_SYSCALL=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_PROFILING=y
+CONFIG_SOC_SIFIVE=y
CONFIG_SOC_VIRT=y
CONFIG_ARCH_RV32I=y
CONFIG_SMP=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PM=y
+CONFIG_CPU_IDLE=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
+CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_NET=y
@@ -56,16 +65,16 @@ CONFIG_INPUT_MOUSEDEV=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
-CONFIG_HVC_RISCV_SBI=y
CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
+CONFIG_SPI=y
+CONFIG_SPI_SIFIVE=y
# CONFIG_PTP_1588_CLOCK is not set
-CONFIG_POWER_RESET=y
CONFIG_DRM=y
CONFIG_DRM_RADEON=y
CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
CONFIG_USB_XHCI_HCD=y
@@ -76,12 +85,15 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
+CONFIG_MMC=y
+CONFIG_MMC_SPI=y
CONFIG_RTC_CLASS=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_INPUT=y
CONFIG_VIRTIO_MMIO=y
CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_CTRL=y
CONFIG_RPMSG_VIRTIO=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
@@ -90,6 +102,7 @@ CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y
@@ -120,8 +133,6 @@ CONFIG_DEBUG_PLIST=y
CONFIG_DEBUG_SG=y
# CONFIG_RCU_TRACE is not set
CONFIG_RCU_EQS_DEBUG=y
-CONFIG_DEBUG_BLOCK_EXT_DEVT=y
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
-# CONFIG_SYSFS_SYSCALL is not set
diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile
new file mode 100644
index 000000000000..a1055965fbee
--- /dev/null
+++ b/arch/riscv/errata/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_ERRATA_SIFIVE) += sifive/
+obj-$(CONFIG_ERRATA_THEAD) += thead/
diff --git a/arch/riscv/errata/sifive/Makefile b/arch/riscv/errata/sifive/Makefile
new file mode 100644
index 000000000000..2fde48db0619
--- /dev/null
+++ b/arch/riscv/errata/sifive/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_ERRATA_SIFIVE_CIP_453) += errata_cip_453.o
+obj-y += errata.o
diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c
new file mode 100644
index 000000000000..672f02b21ce0
--- /dev/null
+++ b/arch/riscv/errata/sifive/errata.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Sifive.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/bug.h>
+#include <asm/patch.h>
+#include <asm/alternative.h>
+#include <asm/vendorid_list.h>
+#include <asm/errata_list.h>
+
+struct errata_info_t {
+ char name[ERRATA_STRING_LENGTH_MAX];
+ bool (*check_func)(unsigned long arch_id, unsigned long impid);
+};
+
+static bool errata_cip_453_check_func(unsigned long arch_id, unsigned long impid)
+{
+ /*
+ * Affected cores:
+ * Architecture ID: 0x8000000000000007
+ * Implement ID: 0x20181004 <= impid <= 0x20191105
+ */
+ if (arch_id != 0x8000000000000007 ||
+ (impid < 0x20181004 || impid > 0x20191105))
+ return false;
+ return true;
+}
+
+static bool errata_cip_1200_check_func(unsigned long arch_id, unsigned long impid)
+{
+ /*
+ * Affected cores:
+ * Architecture ID: 0x8000000000000007 or 0x1
+ * Implement ID: mimpid[23:0] <= 0x200630 and mimpid != 0x01200626
+ */
+ if (arch_id != 0x8000000000000007 && arch_id != 0x1)
+ return false;
+ if ((impid & 0xffffff) > 0x200630 || impid == 0x1200626)
+ return false;
+ return true;
+}
+
+static struct errata_info_t errata_list[ERRATA_SIFIVE_NUMBER] = {
+ {
+ .name = "cip-453",
+ .check_func = errata_cip_453_check_func
+ },
+ {
+ .name = "cip-1200",
+ .check_func = errata_cip_1200_check_func
+ },
+};
+
+static u32 __init_or_module sifive_errata_probe(unsigned long archid,
+ unsigned long impid)
+{
+ int idx;
+ u32 cpu_req_errata = 0;
+
+ for (idx = 0; idx < ERRATA_SIFIVE_NUMBER; idx++)
+ if (errata_list[idx].check_func(archid, impid))
+ cpu_req_errata |= (1U << idx);
+
+ return cpu_req_errata;
+}
+
+static void __init_or_module warn_miss_errata(u32 miss_errata)
+{
+ int i;
+
+ pr_warn("----------------------------------------------------------------\n");
+ pr_warn("WARNING: Missing the following errata may cause potential issues\n");
+ for (i = 0; i < ERRATA_SIFIVE_NUMBER; i++)
+ if (miss_errata & 0x1 << i)
+ pr_warn("\tSiFive Errata[%d]:%s\n", i, errata_list[i].name);
+ pr_warn("Please enable the corresponding Kconfig to apply them\n");
+ pr_warn("----------------------------------------------------------------\n");
+}
+
+void __init_or_module sifive_errata_patch_func(struct alt_entry *begin,
+ struct alt_entry *end,
+ unsigned long archid,
+ unsigned long impid,
+ unsigned int stage)
+{
+ struct alt_entry *alt;
+ u32 cpu_req_errata;
+ u32 cpu_apply_errata = 0;
+ u32 tmp;
+
+ if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
+ return;
+
+ cpu_req_errata = sifive_errata_probe(archid, impid);
+
+ for (alt = begin; alt < end; alt++) {
+ if (alt->vendor_id != SIFIVE_VENDOR_ID)
+ continue;
+ if (alt->errata_id >= ERRATA_SIFIVE_NUMBER) {
+ WARN(1, "This errata id:%d is not in kernel errata list", alt->errata_id);
+ continue;
+ }
+
+ tmp = (1U << alt->errata_id);
+ if (cpu_req_errata & tmp) {
+ patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
+ cpu_apply_errata |= tmp;
+ }
+ }
+ if (cpu_apply_errata != cpu_req_errata)
+ warn_miss_errata(cpu_req_errata - cpu_apply_errata);
+}
diff --git a/arch/riscv/errata/sifive/errata_cip_453.S b/arch/riscv/errata/sifive/errata_cip_453.S
new file mode 100644
index 000000000000..f1b9623fe1de
--- /dev/null
+++ b/arch/riscv/errata/sifive/errata_cip_453.S
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/alternative.h>
+
+.macro ADD_SIGN_EXT pt_reg badaddr tmp_reg
+ REG_L \badaddr, PT_BADADDR(\pt_reg)
+ li \tmp_reg,1
+ slli \tmp_reg,\tmp_reg,0x26
+ and \tmp_reg,\tmp_reg,\badaddr
+ beqz \tmp_reg, 1f
+ li \tmp_reg,-1
+ slli \tmp_reg,\tmp_reg,0x27
+ or \badaddr,\tmp_reg,\badaddr
+ REG_S \badaddr, PT_BADADDR(\pt_reg)
+1:
+.endm
+
+ENTRY(sifive_cip_453_page_fault_trp)
+ ADD_SIGN_EXT a0, t0, t1
+#ifdef CONFIG_MMU
+ la t0, do_page_fault
+#else
+ la t0, do_trap_unknown
+#endif
+ jr t0
+END(sifive_cip_453_page_fault_trp)
+
+ENTRY(sifive_cip_453_insn_fault_trp)
+ ADD_SIGN_EXT a0, t0, t1
+ la t0, do_trap_insn_fault
+ jr t0
+END(sifive_cip_453_insn_fault_trp)
diff --git a/arch/riscv/errata/thead/Makefile b/arch/riscv/errata/thead/Makefile
new file mode 100644
index 000000000000..137e700d9d3f
--- /dev/null
+++ b/arch/riscv/errata/thead/Makefile
@@ -0,0 +1,11 @@
+ifdef CONFIG_RISCV_ALTERNATIVE_EARLY
+CFLAGS_errata.o := -mcmodel=medany
+ifdef CONFIG_FTRACE
+CFLAGS_REMOVE_errata.o = $(CC_FLAGS_FTRACE)
+endif
+ifdef CONFIG_KASAN
+KASAN_SANITIZE_errata.o := n
+endif
+endif
+
+obj-y += errata.o
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
new file mode 100644
index 000000000000..e5d75270b99c
--- /dev/null
+++ b/arch/riscv/errata/thead/errata.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Heiko Stuebner <heiko@sntech.de>
+ */
+
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <asm/alternative.h>
+#include <asm/cacheflush.h>
+#include <asm/errata_list.h>
+#include <asm/patch.h>
+#include <asm/vendorid_list.h>
+
+struct errata_info {
+ char name[ERRATA_STRING_LENGTH_MAX];
+ bool (*check_func)(unsigned long arch_id, unsigned long impid);
+ unsigned int stage;
+};
+
+static bool errata_mt_check_func(unsigned long arch_id, unsigned long impid)
+{
+ if (arch_id != 0 || impid != 0)
+ return false;
+ return true;
+}
+
+static const struct errata_info errata_list[ERRATA_THEAD_NUMBER] = {
+ {
+ .name = "memory-types",
+ .stage = RISCV_ALTERNATIVES_EARLY_BOOT,
+ .check_func = errata_mt_check_func
+ },
+};
+
+static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid)
+{
+ const struct errata_info *info;
+ u32 cpu_req_errata = 0;
+ int idx;
+
+ for (idx = 0; idx < ERRATA_THEAD_NUMBER; idx++) {
+ info = &errata_list[idx];
+
+ if ((stage == RISCV_ALTERNATIVES_MODULE ||
+ info->stage == stage) && info->check_func(archid, impid))
+ cpu_req_errata |= (1U << idx);
+ }
+
+ return cpu_req_errata;
+}
+
+void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
+ unsigned long archid, unsigned long impid,
+ unsigned int stage)
+{
+ struct alt_entry *alt;
+ u32 cpu_req_errata = thead_errata_probe(stage, archid, impid);
+ u32 tmp;
+
+ for (alt = begin; alt < end; alt++) {
+ if (alt->vendor_id != THEAD_VENDOR_ID)
+ continue;
+ if (alt->errata_id >= ERRATA_THEAD_NUMBER)
+ continue;
+
+ tmp = (1U << alt->errata_id);
+ if (cpu_req_errata & tmp) {
+ /* On vm-alternatives, the mmu isn't running yet */
+ if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
+ memcpy((void *)__pa_symbol(alt->old_ptr),
+ (void *)__pa_symbol(alt->alt_ptr), alt->alt_len);
+ else
+ patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
+ }
+ }
+
+ if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
+ local_flush_icache_all();
+}
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 3d9410bb4de0..504f8b7e72d4 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -1,7 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
-generic-y += extable.h
+generic-y += early_ioremap.h
generic-y += flat.h
generic-y += kvm_para.h
-generic-y += local64.h
+generic-y += parport.h
+generic-y += spinlock.h
+generic-y += spinlock_types.h
+generic-y += qrwlock.h
+generic-y += qrwlock_types.h
generic-y += user.h
generic-y += vmlinux.lds.h
diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
new file mode 100644
index 000000000000..ec2f3f1b836f
--- /dev/null
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_MACROS_H
+#define __ASM_ALTERNATIVE_MACROS_H
+
+#ifdef CONFIG_RISCV_ALTERNATIVE
+
+#ifdef __ASSEMBLY__
+
+.macro ALT_ENTRY oldptr newptr vendor_id errata_id new_len
+ RISCV_PTR \oldptr
+ RISCV_PTR \newptr
+ REG_ASM \vendor_id
+ REG_ASM \new_len
+ .word \errata_id
+.endm
+
+.macro ALT_NEW_CONTENT vendor_id, errata_id, enable = 1, new_c : vararg
+ .if \enable
+ .pushsection .alternative, "a"
+ ALT_ENTRY 886b, 888f, \vendor_id, \errata_id, 889f - 888f
+ .popsection
+ .subsection 1
+888 :
+ .option push
+ .option norvc
+ .option norelax
+ \new_c
+ .option pop
+889 :
+ .org . - (889b - 888b) + (887b - 886b)
+ .org . - (887b - 886b) + (889b - 888b)
+ .previous
+ .endif
+.endm
+
+.macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable
+886 :
+ .option push
+ .option norvc
+ .option norelax
+ \old_c
+ .option pop
+887 :
+ ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c
+.endm
+
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
+ __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)
+
+.macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
+ new_c_2, vendor_id_2, errata_id_2, enable_2
+886 :
+ .option push
+ .option norvc
+ .option norelax
+ \old_c
+ .option pop
+887 :
+ ALT_NEW_CONTENT \vendor_id_1, \errata_id_1, \enable_1, \new_c_1
+ ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2
+.endm
+
+#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \
+ CONFIG_k_1, \
+ new_c_2, vendor_id_2, errata_id_2, \
+ CONFIG_k_2) \
+ __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, \
+ IS_ENABLED(CONFIG_k_1), \
+ new_c_2, vendor_id_2, errata_id_2, \
+ IS_ENABLED(CONFIG_k_2)
+
+#else /* !__ASSEMBLY__ */
+
+#include <asm/asm.h>
+#include <linux/stringify.h>
+
+#define ALT_ENTRY(oldptr, newptr, vendor_id, errata_id, newlen) \
+ RISCV_PTR " " oldptr "\n" \
+ RISCV_PTR " " newptr "\n" \
+ REG_ASM " " vendor_id "\n" \
+ REG_ASM " " newlen "\n" \
+ ".word " errata_id "\n"
+
+#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) \
+ ".if " __stringify(enable) " == 1\n" \
+ ".pushsection .alternative, \"a\"\n" \
+ ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \
+ ".popsection\n" \
+ ".subsection 1\n" \
+ "888 :\n" \
+ ".option push\n" \
+ ".option norvc\n" \
+ ".option norelax\n" \
+ new_c "\n" \
+ ".option pop\n" \
+ "889 :\n" \
+ ".org . - (887b - 886b) + (889b - 888b)\n" \
+ ".org . - (889b - 888b) + (887b - 886b)\n" \
+ ".previous\n" \
+ ".endif\n"
+
+#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \
+ "886 :\n" \
+ ".option push\n" \
+ ".option norvc\n" \
+ ".option norelax\n" \
+ old_c "\n" \
+ ".option pop\n" \
+ "887 :\n" \
+ ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c)
+
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
+ __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k))
+
+#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \
+ enable_1, \
+ new_c_2, vendor_id_2, errata_id_2, \
+ enable_2) \
+ "886 :\n" \
+ ".option push\n" \
+ ".option norvc\n" \
+ ".option norelax\n" \
+ old_c "\n" \
+ ".option pop\n" \
+ "887 :\n" \
+ ALT_NEW_CONTENT(vendor_id_1, errata_id_1, enable_1, new_c_1) \
+ ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2)
+
+#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \
+ CONFIG_k_1, \
+ new_c_2, vendor_id_2, errata_id_2, \
+ CONFIG_k_2) \
+ __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \
+ IS_ENABLED(CONFIG_k_1), \
+ new_c_2, vendor_id_2, errata_id_2, \
+ IS_ENABLED(CONFIG_k_2))
+
+#endif /* __ASSEMBLY__ */
+
+#else /* CONFIG_RISCV_ALTERNATIVE */
+#ifdef __ASSEMBLY__
+
+.macro __ALTERNATIVE_CFG old_c
+ \old_c
+.endm
+
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
+ __ALTERNATIVE_CFG old_c
+
+#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \
+ CONFIG_k_1, \
+ new_c_2, vendor_id_2, errata_id_2, \
+ CONFIG_k_2) \
+ __ALTERNATIVE_CFG old_c
+
+#else /* !__ASSEMBLY__ */
+
+#define __ALTERNATIVE_CFG(old_c) \
+ old_c "\n"
+
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
+ __ALTERNATIVE_CFG(old_c)
+
+#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \
+ CONFIG_k_1, \
+ new_c_2, vendor_id_2, errata_id_2, \
+ CONFIG_k_2) \
+ __ALTERNATIVE_CFG(old_c)
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_RISCV_ALTERNATIVE */
+
+/*
+ * Usage:
+ * ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k)
+ * in the assembly code. Otherwise,
+ * asm(ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k));
+ *
+ * old_content: The old content which is probably replaced with new content.
+ * new_content: The new content.
+ * vendor_id: The CPU vendor ID.
+ * errata_id: The errata ID.
+ * CONFIG_k: The Kconfig of this errata. When Kconfig is disabled, the old
+ * content will alwyas be executed.
+ */
+#define ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k) \
+ _ALTERNATIVE_CFG(old_content, new_content, vendor_id, errata_id, CONFIG_k)
+
+/*
+ * A vendor wants to replace an old_content, but another vendor has used
+ * ALTERNATIVE() to patch its customized content at the same location. In
+ * this case, this vendor can create a new macro ALTERNATIVE_2() based
+ * on the following sample code and then replace ALTERNATIVE() with
+ * ALTERNATIVE_2() to append its customized content.
+ */
+#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, \
+ errata_id_1, CONFIG_k_1, \
+ new_content_2, vendor_id_2, \
+ errata_id_2, CONFIG_k_2) \
+ _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, \
+ errata_id_1, CONFIG_k_1, \
+ new_content_2, vendor_id_2, \
+ errata_id_2, CONFIG_k_2)
+
+#endif
diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
new file mode 100644
index 000000000000..6511dd73e812
--- /dev/null
+++ b/arch/riscv/include/asm/alternative.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Sifive.
+ */
+
+#ifndef __ASM_ALTERNATIVE_H
+#define __ASM_ALTERNATIVE_H
+
+#define ERRATA_STRING_LENGTH_MAX 32
+
+#include <asm/alternative-macros.h>
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_RISCV_ALTERNATIVE
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/hwcap.h>
+
+#define RISCV_ALTERNATIVES_BOOT 0 /* alternatives applied during regular boot */
+#define RISCV_ALTERNATIVES_MODULE 1 /* alternatives applied during module-init */
+#define RISCV_ALTERNATIVES_EARLY_BOOT 2 /* alternatives applied before mmu start */
+
+void __init apply_boot_alternatives(void);
+void __init apply_early_boot_alternatives(void);
+void apply_module_alternatives(void *start, size_t length);
+
+struct alt_entry {
+ void *old_ptr; /* address of original instruciton or data */
+ void *alt_ptr; /* address of replacement instruction or data */
+ unsigned long vendor_id; /* cpu vendor id */
+ unsigned long alt_len; /* The replacement size */
+ unsigned int errata_id; /* The errata id */
+} __packed;
+
+struct errata_checkfunc_id {
+ unsigned long vendor_id;
+ bool (*func)(struct alt_entry *alt);
+};
+
+void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
+ unsigned long archid, unsigned long impid,
+ unsigned int stage);
+void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
+ unsigned long archid, unsigned long impid,
+ unsigned int stage);
+
+void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end,
+ unsigned int stage);
+
+#else /* CONFIG_RISCV_ALTERNATIVE */
+
+static inline void apply_boot_alternatives(void) { }
+static inline void apply_early_boot_alternatives(void) { }
+static inline void apply_module_alternatives(void *start, size_t length) { }
+
+#endif /* CONFIG_RISCV_ALTERNATIVE */
+
+#endif
+#endif
diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h
new file mode 100644
index 000000000000..14be0673f5b5
--- /dev/null
+++ b/arch/riscv/include/asm/asm-extable.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_ASM_EXTABLE_H
+#define __ASM_ASM_EXTABLE_H
+
+#define EX_TYPE_NONE 0
+#define EX_TYPE_FIXUP 1
+#define EX_TYPE_BPF 2
+#define EX_TYPE_UACCESS_ERR_ZERO 3
+
+#ifdef __ASSEMBLY__
+
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
+ .pushsection __ex_table, "a"; \
+ .balign 4; \
+ .long ((insn) - .); \
+ .long ((fixup) - .); \
+ .short (type); \
+ .short (data); \
+ .popsection;
+
+ .macro _asm_extable, insn, fixup
+ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0)
+ .endm
+
+#else /* __ASSEMBLY__ */
+
+#include <linux/bits.h>
+#include <linux/stringify.h>
+#include <asm/gpr-num.h>
+
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
+ ".pushsection __ex_table, \"a\"\n" \
+ ".balign 4\n" \
+ ".long ((" insn ") - .)\n" \
+ ".long ((" fixup ") - .)\n" \
+ ".short (" type ")\n" \
+ ".short (" data ")\n" \
+ ".popsection\n"
+
+#define _ASM_EXTABLE(insn, fixup) \
+ __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0")
+
+#define EX_DATA_REG_ERR_SHIFT 0
+#define EX_DATA_REG_ERR GENMASK(4, 0)
+#define EX_DATA_REG_ZERO_SHIFT 5
+#define EX_DATA_REG_ZERO GENMASK(9, 5)
+
+#define EX_DATA_REG(reg, gpr) \
+ "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
+
+#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_UACCESS_ERR_ZERO), \
+ "(" \
+ EX_DATA_REG(ERR, err) " | " \
+ EX_DATA_REG(ZERO, zero) \
+ ")")
+
+#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index 27e005fca584..ef386fcf3939 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -9,4 +9,23 @@ long long __lshrti3(long long a, int b);
long long __ashrti3(long long a, int b);
long long __ashlti3(long long a, int b);
+
+#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs)
+
+DECLARE_DO_ERROR_INFO(do_trap_unknown);
+DECLARE_DO_ERROR_INFO(do_trap_insn_misaligned);
+DECLARE_DO_ERROR_INFO(do_trap_insn_fault);
+DECLARE_DO_ERROR_INFO(do_trap_insn_illegal);
+DECLARE_DO_ERROR_INFO(do_trap_load_fault);
+DECLARE_DO_ERROR_INFO(do_trap_load_misaligned);
+DECLARE_DO_ERROR_INFO(do_trap_store_misaligned);
+DECLARE_DO_ERROR_INFO(do_trap_store_fault);
+DECLARE_DO_ERROR_INFO(do_trap_ecall_u);
+DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
+DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
+DECLARE_DO_ERROR_INFO(do_trap_break);
+
+asmlinkage unsigned long get_overflow_stack(void);
+asmlinkage void handle_bad_stack(struct pt_regs *regs);
+
#endif /* _ASM_RISCV_PROTOTYPES_H */
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 9c992a88d858..618d7c5af1a2 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -23,6 +23,7 @@
#define REG_L __REG_SEL(ld, lw)
#define REG_S __REG_SEL(sd, sw)
#define REG_SC __REG_SEL(sc.d, sc.w)
+#define REG_ASM __REG_SEL(.dword, .word)
#define SZREG __REG_SEL(8, 4)
#define LGREG __REG_SEL(3, 2)
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 96f95c9ebd97..0dfe9d857a76 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -19,30 +19,28 @@
#include <asm/cmpxchg.h>
#include <asm/barrier.h>
-#define ATOMIC_INIT(i) { (i) }
-
#define __atomic_acquire_fence() \
__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory")
#define __atomic_release_fence() \
__asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");
-static __always_inline int atomic_read(const atomic_t *v)
+static __always_inline int arch_atomic_read(const atomic_t *v)
{
return READ_ONCE(v->counter);
}
-static __always_inline void atomic_set(atomic_t *v, int i)
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
WRITE_ONCE(v->counter, i);
}
#ifndef CONFIG_GENERIC_ATOMIC64
#define ATOMIC64_INIT(i) { (i) }
-static __always_inline s64 atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return READ_ONCE(v->counter);
}
-static __always_inline void atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
WRITE_ONCE(v->counter, i);
}
@@ -55,7 +53,7 @@ static __always_inline void atomic64_set(atomic64_t *v, s64 i)
*/
#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline \
-void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
+void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
{ \
__asm__ __volatile__ ( \
" amo" #asm_op "." #asm_type " zero, %1, %0" \
@@ -89,7 +87,7 @@ ATOMIC_OPS(xor, xor, i)
*/
#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline \
-c_type atomic##prefix##_fetch_##op##_relaxed(c_type i, \
+c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
atomic##prefix##_t *v) \
{ \
register c_type ret; \
@@ -101,7 +99,7 @@ c_type atomic##prefix##_fetch_##op##_relaxed(c_type i, \
return ret; \
} \
static __always_inline \
-c_type atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
+c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
{ \
register c_type ret; \
__asm__ __volatile__ ( \
@@ -114,15 +112,15 @@ c_type atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix) \
static __always_inline \
-c_type atomic##prefix##_##op##_return_relaxed(c_type i, \
+c_type arch_atomic##prefix##_##op##_return_relaxed(c_type i, \
atomic##prefix##_t *v) \
{ \
- return atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \
+ return arch_atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \
} \
static __always_inline \
-c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
+c_type arch_atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
{ \
- return atomic##prefix##_fetch_##op(i, v) c_op I; \
+ return arch_atomic##prefix##_fetch_##op(i, v) c_op I; \
}
#ifdef CONFIG_GENERIC_ATOMIC64
@@ -140,26 +138,26 @@ c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
ATOMIC_OPS(add, add, +, i)
ATOMIC_OPS(sub, add, +, -i)
-#define atomic_add_return_relaxed atomic_add_return_relaxed
-#define atomic_sub_return_relaxed atomic_sub_return_relaxed
-#define atomic_add_return atomic_add_return
-#define atomic_sub_return atomic_sub_return
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
-#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
-#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
-#define atomic_fetch_add atomic_fetch_add
-#define atomic_fetch_sub atomic_fetch_sub
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
#ifndef CONFIG_GENERIC_ATOMIC64
-#define atomic64_add_return_relaxed atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
-#define atomic64_add_return atomic64_add_return
-#define atomic64_sub_return atomic64_sub_return
-
-#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
-#define atomic64_fetch_add atomic64_fetch_add
-#define atomic64_fetch_sub atomic64_fetch_sub
+#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+#define arch_atomic64_add_return arch_atomic64_add_return
+#define arch_atomic64_sub_return arch_atomic64_sub_return
+
+#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
+#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
#endif
#undef ATOMIC_OPS
@@ -177,20 +175,20 @@ ATOMIC_OPS(and, and, i)
ATOMIC_OPS( or, or, i)
ATOMIC_OPS(xor, xor, i)
-#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
-#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
-#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
-#define atomic_fetch_and atomic_fetch_and
-#define atomic_fetch_or atomic_fetch_or
-#define atomic_fetch_xor atomic_fetch_xor
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
#ifndef CONFIG_GENERIC_ATOMIC64
-#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
-#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
-#define atomic64_fetch_and atomic64_fetch_and
-#define atomic64_fetch_or atomic64_fetch_or
-#define atomic64_fetch_xor atomic64_fetch_xor
+#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
#endif
#undef ATOMIC_OPS
@@ -199,7 +197,7 @@ ATOMIC_OPS(xor, xor, i)
#undef ATOMIC_OP_RETURN
/* This is required to provide a full barrier on success. */
-static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
int prev, rc;
@@ -216,10 +214,10 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
: "memory");
return prev;
}
-#define atomic_fetch_add_unless atomic_fetch_add_unless
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
#ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 prev;
long rc;
@@ -237,7 +235,7 @@ static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u
: "memory");
return prev;
}
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif
/*
@@ -246,45 +244,45 @@ static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u
*/
#define ATOMIC_OP(c_t, prefix, size) \
static __always_inline \
-c_t atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n) \
+c_t arch_atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n) \
{ \
return __xchg_relaxed(&(v->counter), n, size); \
} \
static __always_inline \
-c_t atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n) \
+c_t arch_atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n) \
{ \
return __xchg_acquire(&(v->counter), n, size); \
} \
static __always_inline \
-c_t atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \
+c_t arch_atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \
{ \
return __xchg_release(&(v->counter), n, size); \
} \
static __always_inline \
-c_t atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \
+c_t arch_atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \
{ \
return __xchg(&(v->counter), n, size); \
} \
static __always_inline \
-c_t atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \
+c_t arch_atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \
c_t o, c_t n) \
{ \
return __cmpxchg_relaxed(&(v->counter), o, n, size); \
} \
static __always_inline \
-c_t atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v, \
+c_t arch_atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v, \
c_t o, c_t n) \
{ \
return __cmpxchg_acquire(&(v->counter), o, n, size); \
} \
static __always_inline \
-c_t atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v, \
+c_t arch_atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v, \
c_t o, c_t n) \
{ \
return __cmpxchg_release(&(v->counter), o, n, size); \
} \
static __always_inline \
-c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
+c_t arch_atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
{ \
return __cmpxchg(&(v->counter), o, n, size); \
}
@@ -300,59 +298,141 @@ c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
ATOMIC_OPS()
-#define atomic_xchg_relaxed atomic_xchg_relaxed
-#define atomic_xchg_acquire atomic_xchg_acquire
-#define atomic_xchg_release atomic_xchg_release
-#define atomic_xchg atomic_xchg
-#define atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed
-#define atomic_cmpxchg_acquire atomic_cmpxchg_acquire
-#define atomic_cmpxchg_release atomic_cmpxchg_release
-#define atomic_cmpxchg atomic_cmpxchg
+#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
+#define arch_atomic_xchg_acquire arch_atomic_xchg_acquire
+#define arch_atomic_xchg_release arch_atomic_xchg_release
+#define arch_atomic_xchg arch_atomic_xchg
+#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
+#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
#undef ATOMIC_OPS
#undef ATOMIC_OP
-static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset)
+static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
+{
+ int prev, rc;
+
+ __asm__ __volatile__ (
+ "0: lr.w %[p], %[c]\n"
+ " bltz %[p], 1f\n"
+ " addi %[rc], %[p], 1\n"
+ " sc.w.rl %[rc], %[rc], %[c]\n"
+ " bnez %[rc], 0b\n"
+ " fence rw, rw\n"
+ "1:\n"
+ : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
+ :
+ : "memory");
+ return !(prev < 0);
+}
+
+#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
+
+static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v)
+{
+ int prev, rc;
+
+ __asm__ __volatile__ (
+ "0: lr.w %[p], %[c]\n"
+ " bgtz %[p], 1f\n"
+ " addi %[rc], %[p], -1\n"
+ " sc.w.rl %[rc], %[rc], %[c]\n"
+ " bnez %[rc], 0b\n"
+ " fence rw, rw\n"
+ "1:\n"
+ : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
+ :
+ : "memory");
+ return !(prev > 0);
+}
+
+#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
+
+static __always_inline int arch_atomic_dec_if_positive(atomic_t *v)
{
int prev, rc;
__asm__ __volatile__ (
"0: lr.w %[p], %[c]\n"
- " sub %[rc], %[p], %[o]\n"
+ " addi %[rc], %[p], -1\n"
" bltz %[rc], 1f\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
" fence rw, rw\n"
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
- : [o]"r" (offset)
+ :
: "memory");
- return prev - offset;
+ return prev - 1;
}
-#define atomic_dec_if_positive(v) atomic_sub_if_positive(v, 1)
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
#ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline s64 atomic64_sub_if_positive(atomic64_t *v, s64 offset)
+static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v)
+{
+ s64 prev;
+ long rc;
+
+ __asm__ __volatile__ (
+ "0: lr.d %[p], %[c]\n"
+ " bltz %[p], 1f\n"
+ " addi %[rc], %[p], 1\n"
+ " sc.d.rl %[rc], %[rc], %[c]\n"
+ " bnez %[rc], 0b\n"
+ " fence rw, rw\n"
+ "1:\n"
+ : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
+ :
+ : "memory");
+ return !(prev < 0);
+}
+
+#define arch_atomic64_inc_unless_negative arch_atomic64_inc_unless_negative
+
+static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v)
+{
+ s64 prev;
+ long rc;
+
+ __asm__ __volatile__ (
+ "0: lr.d %[p], %[c]\n"
+ " bgtz %[p], 1f\n"
+ " addi %[rc], %[p], -1\n"
+ " sc.d.rl %[rc], %[rc], %[c]\n"
+ " bnez %[rc], 0b\n"
+ " fence rw, rw\n"
+ "1:\n"
+ : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
+ :
+ : "memory");
+ return !(prev > 0);
+}
+
+#define arch_atomic64_dec_unless_positive arch_atomic64_dec_unless_positive
+
+static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 prev;
long rc;
__asm__ __volatile__ (
"0: lr.d %[p], %[c]\n"
- " sub %[rc], %[p], %[o]\n"
+ " addi %[rc], %[p], -1\n"
" bltz %[rc], 1f\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
" fence rw, rw\n"
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
- : [o]"r" (offset)
+ :
: "memory");
- return prev - offset;
+ return prev - 1;
}
-#define atomic64_dec_if_positive(v) atomic64_sub_if_positive(v, 1)
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
#endif
#endif /* _ASM_RISCV_ATOMIC_H */
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 3f1737f301cc..d0e24aaa2aa0 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -58,8 +58,16 @@ do { \
* The AQ/RL pair provides a RCpc critical section, but there's not really any
* way we can take advantage of that here because the ordering is only enforced
* on that one lock. Thus, we're just doing a full fence.
+ *
+ * Since we allow writeX to be called from preemptive regions we need at least
+ * an "o" in the predecessor set to ensure device writes are visible before the
+ * task is marked as available for scheduling on a new hart. While I don't see
+ * any concrete reason we need a full IO fence, it seems safer to just upgrade
+ * this in order to avoid any IO crossing a scheduling boundary. In both
+ * instances the scheduler pairs this with an mb(), so nothing is necessary on
+ * the new hart.
*/
-#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw)
+#define smp_mb__after_spinlock() RISCV_FENCE(iorw,iorw)
#include <asm-generic/barrier.h>
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 396a3303c537..3540b690944b 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -20,7 +20,6 @@
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/ffs.h>
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index d6f1ec08d97b..1aaea81fb141 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -30,8 +30,8 @@
typedef u32 bug_insn_t;
#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
-#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b"
-#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b"
+#define __BUG_ENTRY_ADDR RISCV_INT " 1b - ."
+#define __BUG_ENTRY_FILE RISCV_INT " %0 - ."
#else
#define __BUG_ENTRY_ADDR RISCV_PTR " 1b"
#define __BUG_ENTRY_FILE RISCV_PTR " %0"
@@ -85,6 +85,7 @@ do { \
struct pt_regs;
struct task_struct;
+void __show_regs(struct pt_regs *regs);
void die(struct pt_regs *regs, const char *str);
void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr);
diff --git a/arch/riscv/include/asm/cacheinfo.h b/arch/riscv/include/asm/cacheinfo.h
index 5d9662e9aba8..d1a365215ec0 100644
--- a/arch/riscv/include/asm/cacheinfo.h
+++ b/arch/riscv/include/asm/cacheinfo.h
@@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 SiFive
+ */
#ifndef _ASM_RISCV_CACHEINFO_H
#define _ASM_RISCV_CACHEINFO_H
@@ -11,5 +14,7 @@ struct riscv_cacheinfo_ops {
};
void riscv_set_cacheinfo_ops(struct riscv_cacheinfo_ops *ops);
+uintptr_t get_cache_size(u32 level, enum cache_type type);
+uintptr_t get_cache_geometry(u32 level, enum cache_type type);
#endif /* _ASM_RISCV_CACHEINFO_H */
diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h
index a279b17a6aad..0789fd37b40a 100644
--- a/arch/riscv/include/asm/clint.h
+++ b/arch/riscv/include/asm/clint.h
@@ -1,39 +1,26 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Google, Inc
+ */
+
#ifndef _ASM_RISCV_CLINT_H
-#define _ASM_RISCV_CLINT_H 1
+#define _ASM_RISCV_CLINT_H
-#include <linux/io.h>
-#include <linux/smp.h>
+#include <linux/types.h>
+#include <asm/mmio.h>
#ifdef CONFIG_RISCV_M_MODE
-extern u32 __iomem *clint_ipi_base;
-
-void clint_init_boot_cpu(void);
-
-static inline void clint_send_ipi_single(unsigned long hartid)
-{
- writel(1, clint_ipi_base + hartid);
-}
-
-static inline void clint_send_ipi_mask(const struct cpumask *mask)
-{
- int cpu;
-
- for_each_cpu(cpu, mask)
- clint_send_ipi_single(cpuid_to_hartid_map(cpu));
-}
-
-static inline void clint_clear_ipi(unsigned long hartid)
-{
- writel(0, clint_ipi_base + hartid);
-}
-#else /* CONFIG_RISCV_M_MODE */
-#define clint_init_boot_cpu() do { } while (0)
-
-/* stubs to for code is only reachable under IS_ENABLED(CONFIG_RISCV_M_MODE): */
-void clint_send_ipi_single(unsigned long hartid);
-void clint_send_ipi_mask(const struct cpumask *hartid_mask);
-void clint_clear_ipi(unsigned long hartid);
-#endif /* CONFIG_RISCV_M_MODE */
-
-#endif /* _ASM_RISCV_CLINT_H */
+/*
+ * This lives in the CLINT driver, but is accessed directly by timex.h to avoid
+ * any overhead when accessing the MMIO timer.
+ *
+ * The ISA defines mtime as a 64-bit memory-mapped register that increments at
+ * a constant frequency, but it doesn't define some other constraints we depend
+ * on (most notably ordering constraints, but also some simpler stuff like the
+ * memory layout). Thus, this is called "clint_time_val" instead of something
+ * like "riscv_mtime", to signify that these non-ISA assumptions must hold.
+ */
+extern u64 __iomem *clint_time_val;
+#endif
+
+#endif
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index d969bab4a26b..12debce235e5 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -37,7 +37,7 @@
__ret; \
})
-#define xchg_relaxed(ptr, x) \
+#define arch_xchg_relaxed(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_relaxed((ptr), \
@@ -72,7 +72,7 @@
__ret; \
})
-#define xchg_acquire(ptr, x) \
+#define arch_xchg_acquire(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_acquire((ptr), \
@@ -107,7 +107,7 @@
__ret; \
})
-#define xchg_release(ptr, x) \
+#define arch_xchg_release(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_release((ptr), \
@@ -140,7 +140,7 @@
__ret; \
})
-#define xchg(ptr, x) \
+#define arch_xchg(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg((ptr), _x_, sizeof(*(ptr))); \
@@ -149,13 +149,13 @@
#define xchg32(ptr, x) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
- xchg((ptr), (x)); \
+ arch_xchg((ptr), (x)); \
})
#define xchg64(ptr, x) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- xchg((ptr), (x)); \
+ arch_xchg((ptr), (x)); \
})
/*
@@ -179,7 +179,7 @@
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
+ : "rJ" ((long)__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
@@ -199,7 +199,7 @@
__ret; \
})
-#define cmpxchg_relaxed(ptr, o, n) \
+#define arch_cmpxchg_relaxed(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -224,7 +224,7 @@
RISCV_ACQUIRE_BARRIER \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
+ : "rJ" ((long)__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
@@ -245,7 +245,7 @@
__ret; \
})
-#define cmpxchg_acquire(ptr, o, n) \
+#define arch_cmpxchg_acquire(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -270,7 +270,7 @@
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
+ : "rJ" ((long)__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
@@ -291,7 +291,7 @@
__ret; \
})
-#define cmpxchg_release(ptr, o, n) \
+#define arch_cmpxchg_release(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -316,7 +316,7 @@
" fence rw, rw\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
+ : "rJ" ((long)__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
@@ -337,7 +337,7 @@
__ret; \
})
-#define cmpxchg(ptr, o, n) \
+#define arch_cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -345,31 +345,19 @@
_o_, _n_, sizeof(*(ptr))); \
})
-#define cmpxchg_local(ptr, o, n) \
+#define arch_cmpxchg_local(ptr, o, n) \
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
-#define cmpxchg32(ptr, o, n) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
- cmpxchg((ptr), (o), (n)); \
-})
-
-#define cmpxchg32_local(ptr, o, n) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
- cmpxchg_relaxed((ptr), (o), (n)) \
-})
-
-#define cmpxchg64(ptr, o, n) \
+#define arch_cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg((ptr), (o), (n)); \
+ arch_cmpxchg((ptr), (o), (n)); \
})
-#define cmpxchg64_local(ptr, o, n) \
+#define arch_cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_relaxed((ptr), (o), (n)); \
+ arch_cmpxchg_relaxed((ptr), (o), (n)); \
})
#endif /* _ASM_RISCV_CMPXCHG_H */
diff --git a/arch/riscv/include/asm/compat.h b/arch/riscv/include/asm/compat.h
new file mode 100644
index 000000000000..2ac955b51148
--- /dev/null
+++ b/arch/riscv/include/asm/compat.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_COMPAT_H
+#define __ASM_COMPAT_H
+
+#define COMPAT_UTS_MACHINE "riscv\0\0"
+
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <asm-generic/compat.h>
+
+static inline int is_compat_task(void)
+{
+ return test_thread_flag(TIF_32BIT);
+}
+
+struct compat_user_regs_struct {
+ compat_ulong_t pc;
+ compat_ulong_t ra;
+ compat_ulong_t sp;
+ compat_ulong_t gp;
+ compat_ulong_t tp;
+ compat_ulong_t t0;
+ compat_ulong_t t1;
+ compat_ulong_t t2;
+ compat_ulong_t s0;
+ compat_ulong_t s1;
+ compat_ulong_t a0;
+ compat_ulong_t a1;
+ compat_ulong_t a2;
+ compat_ulong_t a3;
+ compat_ulong_t a4;
+ compat_ulong_t a5;
+ compat_ulong_t a6;
+ compat_ulong_t a7;
+ compat_ulong_t s2;
+ compat_ulong_t s3;
+ compat_ulong_t s4;
+ compat_ulong_t s5;
+ compat_ulong_t s6;
+ compat_ulong_t s7;
+ compat_ulong_t s8;
+ compat_ulong_t s9;
+ compat_ulong_t s10;
+ compat_ulong_t s11;
+ compat_ulong_t t3;
+ compat_ulong_t t4;
+ compat_ulong_t t5;
+ compat_ulong_t t6;
+};
+
+static inline void regs_to_cregs(struct compat_user_regs_struct *cregs,
+ struct pt_regs *regs)
+{
+ cregs->pc = (compat_ulong_t) regs->epc;
+ cregs->ra = (compat_ulong_t) regs->ra;
+ cregs->sp = (compat_ulong_t) regs->sp;
+ cregs->gp = (compat_ulong_t) regs->gp;
+ cregs->tp = (compat_ulong_t) regs->tp;
+ cregs->t0 = (compat_ulong_t) regs->t0;
+ cregs->t1 = (compat_ulong_t) regs->t1;
+ cregs->t2 = (compat_ulong_t) regs->t2;
+ cregs->s0 = (compat_ulong_t) regs->s0;
+ cregs->s1 = (compat_ulong_t) regs->s1;
+ cregs->a0 = (compat_ulong_t) regs->a0;
+ cregs->a1 = (compat_ulong_t) regs->a1;
+ cregs->a2 = (compat_ulong_t) regs->a2;
+ cregs->a3 = (compat_ulong_t) regs->a3;
+ cregs->a4 = (compat_ulong_t) regs->a4;
+ cregs->a5 = (compat_ulong_t) regs->a5;
+ cregs->a6 = (compat_ulong_t) regs->a6;
+ cregs->a7 = (compat_ulong_t) regs->a7;
+ cregs->s2 = (compat_ulong_t) regs->s2;
+ cregs->s3 = (compat_ulong_t) regs->s3;
+ cregs->s4 = (compat_ulong_t) regs->s4;
+ cregs->s5 = (compat_ulong_t) regs->s5;
+ cregs->s6 = (compat_ulong_t) regs->s6;
+ cregs->s7 = (compat_ulong_t) regs->s7;
+ cregs->s8 = (compat_ulong_t) regs->s8;
+ cregs->s9 = (compat_ulong_t) regs->s9;
+ cregs->s10 = (compat_ulong_t) regs->s10;
+ cregs->s11 = (compat_ulong_t) regs->s11;
+ cregs->t3 = (compat_ulong_t) regs->t3;
+ cregs->t4 = (compat_ulong_t) regs->t4;
+ cregs->t5 = (compat_ulong_t) regs->t5;
+ cregs->t6 = (compat_ulong_t) regs->t6;
+};
+
+static inline void cregs_to_regs(struct compat_user_regs_struct *cregs,
+ struct pt_regs *regs)
+{
+ regs->epc = (unsigned long) cregs->pc;
+ regs->ra = (unsigned long) cregs->ra;
+ regs->sp = (unsigned long) cregs->sp;
+ regs->gp = (unsigned long) cregs->gp;
+ regs->tp = (unsigned long) cregs->tp;
+ regs->t0 = (unsigned long) cregs->t0;
+ regs->t1 = (unsigned long) cregs->t1;
+ regs->t2 = (unsigned long) cregs->t2;
+ regs->s0 = (unsigned long) cregs->s0;
+ regs->s1 = (unsigned long) cregs->s1;
+ regs->a0 = (unsigned long) cregs->a0;
+ regs->a1 = (unsigned long) cregs->a1;
+ regs->a2 = (unsigned long) cregs->a2;
+ regs->a3 = (unsigned long) cregs->a3;
+ regs->a4 = (unsigned long) cregs->a4;
+ regs->a5 = (unsigned long) cregs->a5;
+ regs->a6 = (unsigned long) cregs->a6;
+ regs->a7 = (unsigned long) cregs->a7;
+ regs->s2 = (unsigned long) cregs->s2;
+ regs->s3 = (unsigned long) cregs->s3;
+ regs->s4 = (unsigned long) cregs->s4;
+ regs->s5 = (unsigned long) cregs->s5;
+ regs->s6 = (unsigned long) cregs->s6;
+ regs->s7 = (unsigned long) cregs->s7;
+ regs->s8 = (unsigned long) cregs->s8;
+ regs->s9 = (unsigned long) cregs->s9;
+ regs->s10 = (unsigned long) cregs->s10;
+ regs->s11 = (unsigned long) cregs->s11;
+ regs->t3 = (unsigned long) cregs->t3;
+ regs->t4 = (unsigned long) cregs->t4;
+ regs->t5 = (unsigned long) cregs->t5;
+ regs->t6 = (unsigned long) cregs->t6;
+};
+
+#endif /* __ASM_COMPAT_H */
diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h
index a8ec3c5c1bd2..134590f1b843 100644
--- a/arch/riscv/include/asm/cpu_ops.h
+++ b/arch/riscv/include/asm/cpu_ops.h
@@ -40,7 +40,5 @@ struct cpu_operations {
extern const struct cpu_operations *cpu_ops[NR_CPUS];
void __init cpu_set_ops(int cpu);
-void cpu_update_secondary_bootdata(unsigned int cpuid,
- struct task_struct *tidle);
#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/riscv/include/asm/cpu_ops_sbi.h b/arch/riscv/include/asm/cpu_ops_sbi.h
new file mode 100644
index 000000000000..56e4b76d09ff
--- /dev/null
+++ b/arch/riscv/include/asm/cpu_ops_sbi.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 by Rivos Inc.
+ */
+#ifndef __ASM_CPU_OPS_SBI_H
+#define __ASM_CPU_OPS_SBI_H
+
+#ifndef __ASSEMBLY__
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/threads.h>
+
+/**
+ * struct sbi_hart_boot_data - Hart specific boot used during booting and
+ * cpu hotplug.
+ * @task_ptr: A pointer to the hart specific tp
+ * @stack_ptr: A pointer to the hart specific sp
+ */
+struct sbi_hart_boot_data {
+ void *task_ptr;
+ void *stack_ptr;
+};
+#endif
+
+#endif /* ifndef __ASM_CPU_OPS_SBI_H */
diff --git a/arch/riscv/include/asm/cpuidle.h b/arch/riscv/include/asm/cpuidle.h
new file mode 100644
index 000000000000..71fdc607d4bc
--- /dev/null
+++ b/arch/riscv/include/asm/cpuidle.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021 Allwinner Ltd
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#ifndef _ASM_RISCV_CPUIDLE_H
+#define _ASM_RISCV_CPUIDLE_H
+
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+static inline void cpu_do_idle(void)
+{
+ /*
+ * Add mb() here to ensure that all
+ * IO/MEM accesses are completed prior
+ * to entering WFI.
+ */
+ mb();
+ wait_for_interrupt();
+}
+
+#endif
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index cec462e198ce..6d85655e7edf 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -36,15 +36,28 @@
#define SR_SD _AC(0x8000000000000000, UL) /* FS/XS dirty */
#endif
+#ifdef CONFIG_64BIT
+#define SR_UXL _AC(0x300000000, UL) /* XLEN mask for U-mode */
+#define SR_UXL_32 _AC(0x100000000, UL) /* XLEN = 32 for U-mode */
+#define SR_UXL_64 _AC(0x200000000, UL) /* XLEN = 64 for U-mode */
+#define SR_UXL_SHIFT 32
+#endif
+
/* SATP flags */
#ifndef CONFIG_64BIT
#define SATP_PPN _AC(0x003FFFFF, UL)
#define SATP_MODE_32 _AC(0x80000000, UL)
-#define SATP_MODE SATP_MODE_32
+#define SATP_ASID_BITS 9
+#define SATP_ASID_SHIFT 22
+#define SATP_ASID_MASK _AC(0x1FF, UL)
#else
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
-#define SATP_MODE SATP_MODE_39
+#define SATP_MODE_48 _AC(0x9000000000000000, UL)
+#define SATP_MODE_57 _AC(0xa000000000000000, UL)
+#define SATP_ASID_BITS 16
+#define SATP_ASID_SHIFT 44
+#define SATP_ASID_MASK _AC(0xFFFF, UL)
#endif
/* Exception cause high bit - is an interrupt if set */
@@ -52,22 +65,33 @@
/* Interrupt causes (minus the high bit) */
#define IRQ_S_SOFT 1
+#define IRQ_VS_SOFT 2
#define IRQ_M_SOFT 3
#define IRQ_S_TIMER 5
+#define IRQ_VS_TIMER 6
#define IRQ_M_TIMER 7
#define IRQ_S_EXT 9
+#define IRQ_VS_EXT 10
#define IRQ_M_EXT 11
+#define IRQ_PMU_OVF 13
/* Exception causes */
#define EXC_INST_MISALIGNED 0
#define EXC_INST_ACCESS 1
+#define EXC_INST_ILLEGAL 2
#define EXC_BREAKPOINT 3
#define EXC_LOAD_ACCESS 5
#define EXC_STORE_ACCESS 7
#define EXC_SYSCALL 8
+#define EXC_HYPERVISOR_SYSCALL 9
+#define EXC_SUPERVISOR_SYSCALL 10
#define EXC_INST_PAGE_FAULT 12
#define EXC_LOAD_PAGE_FAULT 13
#define EXC_STORE_PAGE_FAULT 15
+#define EXC_INST_GUEST_PAGE_FAULT 20
+#define EXC_LOAD_GUEST_PAGE_FAULT 21
+#define EXC_VIRTUAL_INST_FAULT 22
+#define EXC_STORE_GUEST_PAGE_FAULT 23
/* PMP configuration */
#define PMP_R 0x01
@@ -79,13 +103,126 @@
#define PMP_A_NAPOT 0x18
#define PMP_L 0x80
+/* HSTATUS flags */
+#ifdef CONFIG_64BIT
+#define HSTATUS_VSXL _AC(0x300000000, UL)
+#define HSTATUS_VSXL_SHIFT 32
+#endif
+#define HSTATUS_VTSR _AC(0x00400000, UL)
+#define HSTATUS_VTW _AC(0x00200000, UL)
+#define HSTATUS_VTVM _AC(0x00100000, UL)
+#define HSTATUS_VGEIN _AC(0x0003f000, UL)
+#define HSTATUS_VGEIN_SHIFT 12
+#define HSTATUS_HU _AC(0x00000200, UL)
+#define HSTATUS_SPVP _AC(0x00000100, UL)
+#define HSTATUS_SPV _AC(0x00000080, UL)
+#define HSTATUS_GVA _AC(0x00000040, UL)
+#define HSTATUS_VSBE _AC(0x00000020, UL)
+
+/* HGATP flags */
+#define HGATP_MODE_OFF _AC(0, UL)
+#define HGATP_MODE_SV32X4 _AC(1, UL)
+#define HGATP_MODE_SV39X4 _AC(8, UL)
+#define HGATP_MODE_SV48X4 _AC(9, UL)
+#define HGATP_MODE_SV57X4 _AC(10, UL)
+
+#define HGATP32_MODE_SHIFT 31
+#define HGATP32_VMID_SHIFT 22
+#define HGATP32_VMID_MASK _AC(0x1FC00000, UL)
+#define HGATP32_PPN _AC(0x003FFFFF, UL)
+
+#define HGATP64_MODE_SHIFT 60
+#define HGATP64_VMID_SHIFT 44
+#define HGATP64_VMID_MASK _AC(0x03FFF00000000000, UL)
+#define HGATP64_PPN _AC(0x00000FFFFFFFFFFF, UL)
+
+#define HGATP_PAGE_SHIFT 12
+
+#ifdef CONFIG_64BIT
+#define HGATP_PPN HGATP64_PPN
+#define HGATP_VMID_SHIFT HGATP64_VMID_SHIFT
+#define HGATP_VMID_MASK HGATP64_VMID_MASK
+#define HGATP_MODE_SHIFT HGATP64_MODE_SHIFT
+#else
+#define HGATP_PPN HGATP32_PPN
+#define HGATP_VMID_SHIFT HGATP32_VMID_SHIFT
+#define HGATP_VMID_MASK HGATP32_VMID_MASK
+#define HGATP_MODE_SHIFT HGATP32_MODE_SHIFT
+#endif
+
+/* VSIP & HVIP relation */
+#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
+#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
+ (_AC(1, UL) << IRQ_S_TIMER) | \
+ (_AC(1, UL) << IRQ_S_EXT))
+
/* symbolic CSR names: */
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
#define CSR_INSTRET 0xc02
+#define CSR_HPMCOUNTER3 0xc03
+#define CSR_HPMCOUNTER4 0xc04
+#define CSR_HPMCOUNTER5 0xc05
+#define CSR_HPMCOUNTER6 0xc06
+#define CSR_HPMCOUNTER7 0xc07
+#define CSR_HPMCOUNTER8 0xc08
+#define CSR_HPMCOUNTER9 0xc09
+#define CSR_HPMCOUNTER10 0xc0a
+#define CSR_HPMCOUNTER11 0xc0b
+#define CSR_HPMCOUNTER12 0xc0c
+#define CSR_HPMCOUNTER13 0xc0d
+#define CSR_HPMCOUNTER14 0xc0e
+#define CSR_HPMCOUNTER15 0xc0f
+#define CSR_HPMCOUNTER16 0xc10
+#define CSR_HPMCOUNTER17 0xc11
+#define CSR_HPMCOUNTER18 0xc12
+#define CSR_HPMCOUNTER19 0xc13
+#define CSR_HPMCOUNTER20 0xc14
+#define CSR_HPMCOUNTER21 0xc15
+#define CSR_HPMCOUNTER22 0xc16
+#define CSR_HPMCOUNTER23 0xc17
+#define CSR_HPMCOUNTER24 0xc18
+#define CSR_HPMCOUNTER25 0xc19
+#define CSR_HPMCOUNTER26 0xc1a
+#define CSR_HPMCOUNTER27 0xc1b
+#define CSR_HPMCOUNTER28 0xc1c
+#define CSR_HPMCOUNTER29 0xc1d
+#define CSR_HPMCOUNTER30 0xc1e
+#define CSR_HPMCOUNTER31 0xc1f
#define CSR_CYCLEH 0xc80
#define CSR_TIMEH 0xc81
#define CSR_INSTRETH 0xc82
+#define CSR_HPMCOUNTER3H 0xc83
+#define CSR_HPMCOUNTER4H 0xc84
+#define CSR_HPMCOUNTER5H 0xc85
+#define CSR_HPMCOUNTER6H 0xc86
+#define CSR_HPMCOUNTER7H 0xc87
+#define CSR_HPMCOUNTER8H 0xc88
+#define CSR_HPMCOUNTER9H 0xc89
+#define CSR_HPMCOUNTER10H 0xc8a
+#define CSR_HPMCOUNTER11H 0xc8b
+#define CSR_HPMCOUNTER12H 0xc8c
+#define CSR_HPMCOUNTER13H 0xc8d
+#define CSR_HPMCOUNTER14H 0xc8e
+#define CSR_HPMCOUNTER15H 0xc8f
+#define CSR_HPMCOUNTER16H 0xc90
+#define CSR_HPMCOUNTER17H 0xc91
+#define CSR_HPMCOUNTER18H 0xc92
+#define CSR_HPMCOUNTER19H 0xc93
+#define CSR_HPMCOUNTER20H 0xc94
+#define CSR_HPMCOUNTER21H 0xc95
+#define CSR_HPMCOUNTER22H 0xc96
+#define CSR_HPMCOUNTER23H 0xc97
+#define CSR_HPMCOUNTER24H 0xc98
+#define CSR_HPMCOUNTER25H 0xc99
+#define CSR_HPMCOUNTER26H 0xc9a
+#define CSR_HPMCOUNTER27H 0xc9b
+#define CSR_HPMCOUNTER28H 0xc9c
+#define CSR_HPMCOUNTER29H 0xc9d
+#define CSR_HPMCOUNTER30H 0xc9e
+#define CSR_HPMCOUNTER31H 0xc9f
+
+#define CSR_SSCOUNTOVF 0xda0
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104
@@ -98,6 +235,31 @@
#define CSR_SIP 0x144
#define CSR_SATP 0x180
+#define CSR_VSSTATUS 0x200
+#define CSR_VSIE 0x204
+#define CSR_VSTVEC 0x205
+#define CSR_VSSCRATCH 0x240
+#define CSR_VSEPC 0x241
+#define CSR_VSCAUSE 0x242
+#define CSR_VSTVAL 0x243
+#define CSR_VSIP 0x244
+#define CSR_VSATP 0x280
+
+#define CSR_HSTATUS 0x600
+#define CSR_HEDELEG 0x602
+#define CSR_HIDELEG 0x603
+#define CSR_HIE 0x604
+#define CSR_HTIMEDELTA 0x605
+#define CSR_HCOUNTEREN 0x606
+#define CSR_HGEIE 0x607
+#define CSR_HTIMEDELTAH 0x615
+#define CSR_HTVAL 0x643
+#define CSR_HIP 0x644
+#define CSR_HVIP 0x645
+#define CSR_HTINST 0x64a
+#define CSR_HGATP 0x680
+#define CSR_HGEIP 0xe12
+
#define CSR_MSTATUS 0x300
#define CSR_MISA 0x301
#define CSR_MIE 0x304
@@ -109,6 +271,9 @@
#define CSR_MIP 0x344
#define CSR_PMPCFG0 0x3a0
#define CSR_PMPADDR0 0x3b0
+#define CSR_MVENDORID 0xf11
+#define CSR_MARCHID 0xf12
+#define CSR_MIMPID 0xf13
#define CSR_MHARTID 0xf14
#ifdef CONFIG_RISCV_M_MODE
@@ -145,7 +310,10 @@
# define RV_IRQ_SOFT IRQ_S_SOFT
# define RV_IRQ_TIMER IRQ_S_TIMER
# define RV_IRQ_EXT IRQ_S_EXT
-#endif /* CONFIG_RISCV_M_MODE */
+# define RV_IRQ_PMU IRQ_PMU_OVF
+# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF)
+
+#endif /* !CONFIG_RISCV_M_MODE */
/* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
#define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT)
diff --git a/arch/riscv/include/asm/current.h b/arch/riscv/include/asm/current.h
index 1de233d8e8de..21774d868c65 100644
--- a/arch/riscv/include/asm/current.h
+++ b/arch/riscv/include/asm/current.h
@@ -33,6 +33,8 @@ static __always_inline struct task_struct *get_current(void)
#define current get_current()
+register unsigned long current_stack_pointer __asm__("sp");
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_CURRENT_H */
diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
new file mode 100644
index 000000000000..cc4f6787f937
--- /dev/null
+++ b/arch/riscv/include/asm/efi.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef _ASM_EFI_H
+#define _ASM_EFI_H
+
+#include <asm/csr.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/ptrace.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_EFI
+extern void efi_init(void);
+#else
+#define efi_init()
+#endif
+
+int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
+int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+
+#define arch_efi_call_virt_setup() efi_virtmap_load()
+#define arch_efi_call_virt_teardown() efi_virtmap_unload()
+
+#define arch_efi_call_virt(p, f, args...) p->f(args)
+
+#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
+
+/* Load initrd anywhere in system RAM */
+static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
+{
+ return ULONG_MAX;
+}
+
+#define alloc_screen_info(x...) (&screen_info)
+
+static inline void free_screen_info(struct screen_info *si)
+{
+}
+
+void efi_virtmap_load(void);
+void efi_virtmap_unload(void);
+
+#endif /* _ASM_EFI_H */
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index d83a4efd052b..14fc7342490b 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -8,27 +8,36 @@
#ifndef _ASM_RISCV_ELF_H
#define _ASM_RISCV_ELF_H
+#include <uapi/linux/elf.h>
+#include <linux/compat.h>
#include <uapi/asm/elf.h>
#include <asm/auxvec.h>
#include <asm/byteorder.h>
+#include <asm/cacheinfo.h>
/*
* These are used to set parameters in the core dumps.
*/
#define ELF_ARCH EM_RISCV
+#ifndef ELF_CLASS
#ifdef CONFIG_64BIT
#define ELF_CLASS ELFCLASS64
#else
#define ELF_CLASS ELFCLASS32
#endif
+#endif
#define ELF_DATA ELFDATA2LSB
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
-#define elf_check_arch(x) ((x)->e_machine == EM_RISCV)
+#define elf_check_arch(x) (((x)->e_machine == EM_RISCV) && \
+ ((x)->e_ident[EI_CLASS] == ELF_CLASS))
+
+extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
+#define compat_elf_check_arch compat_elf_check_arch
#define CORE_DUMP_USE_REGSET
#define ELF_EXEC_PAGESIZE (PAGE_SIZE)
@@ -41,6 +50,15 @@
*/
#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2)
+#ifdef CONFIG_64BIT
+#ifdef CONFIG_COMPAT
+#define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \
+ 0x7ff >> (PAGE_SHIFT - 12) : \
+ 0x3ffff >> (PAGE_SHIFT - 12))
+#else
+#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12))
+#endif
+#endif
/*
* This yields a mask that user programs can use to figure out what
* instruction set this CPU supports. This could be done in user space,
@@ -56,11 +74,31 @@ extern unsigned long elf_hwcap;
*/
#define ELF_PLATFORM (NULL)
+#define COMPAT_ELF_PLATFORM (NULL)
+
#ifdef CONFIG_MMU
#define ARCH_DLINFO \
do { \
+ /* \
+ * Note that we add ulong after elf_addr_t because \
+ * casting current->mm->context.vdso triggers a cast \
+ * warning of cast from pointer to integer for \
+ * COMPAT ELFCLASS32. \
+ */ \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
- (elf_addr_t)current->mm->context.vdso); \
+ (elf_addr_t)(ulong)current->mm->context.vdso); \
+ NEW_AUX_ENT(AT_L1I_CACHESIZE, \
+ get_cache_size(1, CACHE_TYPE_INST)); \
+ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, \
+ get_cache_geometry(1, CACHE_TYPE_INST)); \
+ NEW_AUX_ENT(AT_L1D_CACHESIZE, \
+ get_cache_size(1, CACHE_TYPE_DATA)); \
+ NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, \
+ get_cache_geometry(1, CACHE_TYPE_DATA)); \
+ NEW_AUX_ENT(AT_L2_CACHESIZE, \
+ get_cache_size(2, CACHE_TYPE_UNIFIED)); \
+ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, \
+ get_cache_geometry(2, CACHE_TYPE_UNIFIED)); \
} while (0)
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
struct linux_binprm;
@@ -68,4 +106,34 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
#endif /* CONFIG_MMU */
+#define ELF_CORE_COPY_REGS(dest, regs) \
+do { \
+ *(struct user_regs_struct *)&(dest) = \
+ *(struct user_regs_struct *)regs; \
+} while (0);
+
+#ifdef CONFIG_COMPAT
+
+#define SET_PERSONALITY(ex) \
+do { if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \
+ set_thread_flag(TIF_32BIT); \
+ else \
+ clear_thread_flag(TIF_32BIT); \
+ if (personality(current->personality) != PER_LINUX32) \
+ set_personality(PER_LINUX | \
+ (current->personality & (~PER_MASK))); \
+} while (0)
+
+#define COMPAT_ELF_ET_DYN_BASE ((TASK_SIZE_32 / 3) * 2)
+
+/* rv32 registers */
+typedef compat_ulong_t compat_elf_greg_t;
+typedef compat_elf_greg_t compat_elf_gregset_t[ELF_NGREG];
+
+extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
+ int uses_interp);
+#define compat_arch_setup_additional_pages \
+ compat_arch_setup_additional_pages
+
+#endif /* CONFIG_COMPAT */
#endif /* _ASM_RISCV_ELF_H */
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
new file mode 100644
index 000000000000..9e2888dbb5b1
--- /dev/null
+++ b/arch/riscv/include/asm/errata_list.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Sifive.
+ */
+#ifndef ASM_ERRATA_LIST_H
+#define ASM_ERRATA_LIST_H
+
+#include <asm/alternative.h>
+#include <asm/vendorid_list.h>
+
+#ifdef CONFIG_ERRATA_SIFIVE
+#define ERRATA_SIFIVE_CIP_453 0
+#define ERRATA_SIFIVE_CIP_1200 1
+#define ERRATA_SIFIVE_NUMBER 2
+#endif
+
+#ifdef CONFIG_ERRATA_THEAD
+#define ERRATA_THEAD_PBMT 0
+#define ERRATA_THEAD_NUMBER 1
+#endif
+
+#define CPUFEATURE_SVPBMT 0
+#define CPUFEATURE_NUMBER 1
+
+#ifdef __ASSEMBLY__
+
+#define ALT_INSN_FAULT(x) \
+ALTERNATIVE(__stringify(RISCV_PTR do_trap_insn_fault), \
+ __stringify(RISCV_PTR sifive_cip_453_insn_fault_trp), \
+ SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, \
+ CONFIG_ERRATA_SIFIVE_CIP_453)
+
+#define ALT_PAGE_FAULT(x) \
+ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \
+ __stringify(RISCV_PTR sifive_cip_453_page_fault_trp), \
+ SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, \
+ CONFIG_ERRATA_SIFIVE_CIP_453)
+#else /* !__ASSEMBLY__ */
+
+#define ALT_FLUSH_TLB_PAGE(x) \
+asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
+ ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
+ : : "r" (addr) : "memory")
+
+/*
+ * _val is marked as "will be overwritten", so need to set it to 0
+ * in the default case.
+ */
+#define ALT_SVPBMT_SHIFT 61
+#define ALT_THEAD_PBMT_SHIFT 59
+#define ALT_SVPBMT(_val, prot) \
+asm(ALTERNATIVE_2("li %0, 0\t\nnop", \
+ "li %0, %1\t\nslli %0,%0,%3", 0, \
+ CPUFEATURE_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \
+ "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \
+ ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \
+ : "=r"(_val) \
+ : "I"(prot##_SVPBMT >> ALT_SVPBMT_SHIFT), \
+ "I"(prot##_THEAD >> ALT_THEAD_PBMT_SHIFT), \
+ "I"(ALT_SVPBMT_SHIFT), \
+ "I"(ALT_THEAD_PBMT_SHIFT))
+
+#ifdef CONFIG_ERRATA_THEAD_PBMT
+/*
+ * IO/NOCACHE memory types are handled together with svpbmt,
+ * so on T-Head chips, check if no other memory type is set,
+ * and set the non-0 PMA type if applicable.
+ */
+#define ALT_THEAD_PMA(_val) \
+asm volatile(ALTERNATIVE( \
+ "nop\n\t" \
+ "nop\n\t" \
+ "nop\n\t" \
+ "nop\n\t" \
+ "nop\n\t" \
+ "nop\n\t" \
+ "nop", \
+ "li t3, %2\n\t" \
+ "slli t3, t3, %4\n\t" \
+ "and t3, %0, t3\n\t" \
+ "bne t3, zero, 2f\n\t" \
+ "li t3, %3\n\t" \
+ "slli t3, t3, %4\n\t" \
+ "or %0, %0, t3\n\t" \
+ "2:", THEAD_VENDOR_ID, \
+ ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \
+ : "+r"(_val) \
+ : "0"(_val), \
+ "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \
+ "I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \
+ "I"(ALT_THEAD_PBMT_SHIFT))
+#else
+#define ALT_THEAD_PMA(_val)
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/riscv/include/asm/extable.h b/arch/riscv/include/asm/extable.h
new file mode 100644
index 000000000000..512012d193dc
--- /dev/null
+++ b/arch/riscv/include/asm/extable.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_EXTABLE_H
+#define _ASM_RISCV_EXTABLE_H
+
+/*
+ * The exception table consists of pairs of relative offsets: the first
+ * is the relative offset to an instruction that is allowed to fault,
+ * and the second is the relative offset at which the program should
+ * continue. No registers are modified, so it is entirely up to the
+ * continuation code to figure out what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry {
+ int insn, fixup;
+ short type, data;
+};
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+#define swap_ex_entry_fixup(a, b, tmp, delta) \
+do { \
+ (a)->fixup = (b)->fixup + (delta); \
+ (b)->fixup = (tmp).fixup - (delta); \
+ (a)->type = (b)->type; \
+ (b)->type = (tmp).type; \
+ (a)->data = (b)->data; \
+ (b)->data = (tmp).data; \
+} while (0)
+
+bool fixup_exception(struct pt_regs *regs);
+
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I)
+bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs);
+#else
+static inline bool
+ex_handler_bpf(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ return false;
+}
+#endif
+
+#endif
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 1ff075a8dfc7..5c3e7b97fcc6 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -22,19 +22,29 @@
*/
enum fixed_addresses {
FIX_HOLE,
-#define FIX_FDT_SIZE SZ_1M
- FIX_FDT_END,
- FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
FIX_PTE,
FIX_PMD,
+ FIX_PUD,
+ FIX_P4D,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
+
+ __end_of_permanent_fixed_addresses,
+ /*
+ * Temporary boot-time mappings, used by early_ioremap(),
+ * before ioremap() is functional.
+ */
+#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE)
+#define FIX_BTMAPS_SLOTS 7
+#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+
__end_of_fixed_addresses
};
-#define FIXMAP_PAGE_IO PAGE_KERNEL
-
#define __early_set_fixmap __set_fixmap
#define __late_set_fixmap __set_fixmap
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index ace8a6e2d11d..04dad3380041 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -13,9 +13,19 @@
#endif
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+/*
+ * Clang prior to 13 had "mcount" instead of "_mcount":
+ * https://reviews.llvm.org/D98881
+ */
+#if defined(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 130000
+#define MCOUNT_NAME _mcount
+#else
+#define MCOUNT_NAME mcount
+#endif
+
#define ARCH_SUPPORTS_FTRACE_OPS 1
#ifndef __ASSEMBLY__
-void _mcount(void);
+void MCOUNT_NAME(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
@@ -36,7 +46,7 @@ struct dyn_arch_ftrace {
* both auipc and jalr at the same time.
*/
-#define MCOUNT_ADDR ((unsigned long)_mcount)
+#define MCOUNT_ADDR ((unsigned long)MCOUNT_NAME)
#define JALR_SIGN_MASK (0x00000800)
#define JALR_OFFSET_MASK (0x00000fff)
#define AUIPC_OFFSET_MASK (0xfffff000)
@@ -66,6 +76,13 @@ do { \
* Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
*/
#define MCOUNT_INSN_SIZE 8
+
+#ifndef __ASSEMBLY__
+struct dyn_ftrace;
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+#endif
+
#endif
#endif /* _ASM_RISCV_FTRACE_H */
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index 1b00badb9f87..fc8130f995c1 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -11,6 +11,7 @@
#include <linux/uaccess.h>
#include <linux/errno.h>
#include <asm/asm.h>
+#include <asm/asm-extable.h>
/* We don't even really need the extable code, but for now keep it simple */
#ifndef CONFIG_MMU
@@ -20,23 +21,14 @@
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
{ \
- uintptr_t tmp; \
__enable_user_access(); \
__asm__ __volatile__ ( \
"1: " insn " \n" \
"2: \n" \
- " .section .fixup,\"ax\" \n" \
- " .balign 4 \n" \
- "3: li %[r],%[e] \n" \
- " jump 2b,%[t] \n" \
- " .previous \n" \
- " .section __ex_table,\"a\" \n" \
- " .balign " RISCV_SZPTR " \n" \
- " " RISCV_PTR " 1b, 3b \n" \
- " .previous \n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r]) \
: [r] "+r" (ret), [ov] "=&r" (oldval), \
- [u] "+m" (*uaddr), [t] "=&r" (tmp) \
- : [op] "Jr" (oparg), [e] "i" (-EFAULT) \
+ [u] "+m" (*uaddr) \
+ : [op] "Jr" (oparg) \
: "memory"); \
__disable_user_access(); \
}
@@ -98,18 +90,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
"2: sc.w.aqrl %[t],%z[nv],%[u] \n"
" bnez %[t],1b \n"
"3: \n"
- " .section .fixup,\"ax\" \n"
- " .balign 4 \n"
- "4: li %[r],%[e] \n"
- " jump 3b,%[t] \n"
- " .previous \n"
- " .section __ex_table,\"a\" \n"
- " .balign " RISCV_SZPTR " \n"
- " " RISCV_PTR " 1b, 4b \n"
- " " RISCV_PTR " 2b, 4b \n"
- " .previous \n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %[r]) \
+ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %[r]) \
: [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp)
- : [ov] "Jr" (oldval), [nv] "Jr" (newval), [e] "i" (-EFAULT)
+ : [ov] "Jr" (oldval), [nv] "Jr" (newval)
: "memory");
__disable_user_access();
diff --git a/arch/riscv/include/asm/gdb_xml.h b/arch/riscv/include/asm/gdb_xml.h
index 041b45f5b997..09342111f227 100644
--- a/arch/riscv/include/asm/gdb_xml.h
+++ b/arch/riscv/include/asm/gdb_xml.h
@@ -3,8 +3,7 @@
#ifndef __ASM_GDB_XML_H_
#define __ASM_GDB_XML_H_
-#define kgdb_arch_gdb_stub_feature riscv_gdb_stub_feature
-static const char riscv_gdb_stub_feature[64] =
+const char riscv_gdb_stub_feature[64] =
"PacketSize=800;qXfer:features:read+;";
static const char gdb_xfer_read_target[31] = "qXfer:features:read:target.xml:";
diff --git a/arch/riscv/include/asm/gpr-num.h b/arch/riscv/include/asm/gpr-num.h
new file mode 100644
index 000000000000..dfee2829fc7c
--- /dev/null
+++ b/arch/riscv/include/asm/gpr-num.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_GPR_NUM_H
+#define __ASM_GPR_NUM_H
+
+#ifdef __ASSEMBLY__
+ .equ .L__gpr_num_zero, 0
+ .equ .L__gpr_num_ra, 1
+ .equ .L__gpr_num_sp, 2
+ .equ .L__gpr_num_gp, 3
+ .equ .L__gpr_num_tp, 4
+ .equ .L__gpr_num_t0, 5
+ .equ .L__gpr_num_t1, 6
+ .equ .L__gpr_num_t2, 7
+ .equ .L__gpr_num_s0, 8
+ .equ .L__gpr_num_s1, 9
+ .equ .L__gpr_num_a0, 10
+ .equ .L__gpr_num_a1, 11
+ .equ .L__gpr_num_a2, 12
+ .equ .L__gpr_num_a3, 13
+ .equ .L__gpr_num_a4, 14
+ .equ .L__gpr_num_a5, 15
+ .equ .L__gpr_num_a6, 16
+ .equ .L__gpr_num_a7, 17
+ .equ .L__gpr_num_s2, 18
+ .equ .L__gpr_num_s3, 19
+ .equ .L__gpr_num_s4, 20
+ .equ .L__gpr_num_s5, 21
+ .equ .L__gpr_num_s6, 22
+ .equ .L__gpr_num_s7, 23
+ .equ .L__gpr_num_s8, 24
+ .equ .L__gpr_num_s9, 25
+ .equ .L__gpr_num_s10, 26
+ .equ .L__gpr_num_s11, 27
+ .equ .L__gpr_num_t3, 28
+ .equ .L__gpr_num_t4, 29
+ .equ .L__gpr_num_t5, 30
+ .equ .L__gpr_num_t6, 31
+
+#else /* __ASSEMBLY__ */
+
+#define __DEFINE_ASM_GPR_NUMS \
+" .equ .L__gpr_num_zero, 0\n" \
+" .equ .L__gpr_num_ra, 1\n" \
+" .equ .L__gpr_num_sp, 2\n" \
+" .equ .L__gpr_num_gp, 3\n" \
+" .equ .L__gpr_num_tp, 4\n" \
+" .equ .L__gpr_num_t0, 5\n" \
+" .equ .L__gpr_num_t1, 6\n" \
+" .equ .L__gpr_num_t2, 7\n" \
+" .equ .L__gpr_num_s0, 8\n" \
+" .equ .L__gpr_num_s1, 9\n" \
+" .equ .L__gpr_num_a0, 10\n" \
+" .equ .L__gpr_num_a1, 11\n" \
+" .equ .L__gpr_num_a2, 12\n" \
+" .equ .L__gpr_num_a3, 13\n" \
+" .equ .L__gpr_num_a4, 14\n" \
+" .equ .L__gpr_num_a5, 15\n" \
+" .equ .L__gpr_num_a6, 16\n" \
+" .equ .L__gpr_num_a7, 17\n" \
+" .equ .L__gpr_num_s2, 18\n" \
+" .equ .L__gpr_num_s3, 19\n" \
+" .equ .L__gpr_num_s4, 20\n" \
+" .equ .L__gpr_num_s5, 21\n" \
+" .equ .L__gpr_num_s6, 22\n" \
+" .equ .L__gpr_num_s7, 23\n" \
+" .equ .L__gpr_num_s8, 24\n" \
+" .equ .L__gpr_num_s9, 25\n" \
+" .equ .L__gpr_num_s10, 26\n" \
+" .equ .L__gpr_num_s11, 27\n" \
+" .equ .L__gpr_num_t3, 28\n" \
+" .equ .L__gpr_num_t4, 29\n" \
+" .equ .L__gpr_num_t5, 30\n" \
+" .equ .L__gpr_num_t6, 31\n"
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_GPR_NUM_H */
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5ce50468aff1..4e2486881840 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -34,7 +34,34 @@ extern unsigned long elf_hwcap;
#define RISCV_ISA_EXT_s ('s' - 'a')
#define RISCV_ISA_EXT_u ('u' - 'a')
+/*
+ * Increse this to higher value as kernel support more ISA extensions.
+ */
#define RISCV_ISA_EXT_MAX 64
+#define RISCV_ISA_EXT_NAME_LEN_MAX 32
+
+/* The base ID for multi-letter ISA extensions */
+#define RISCV_ISA_EXT_BASE 26
+
+/*
+ * This enum represent the logical ID for each multi-letter RISC-V ISA extension.
+ * The logical ID should start from RISCV_ISA_EXT_BASE and must not exceed
+ * RISCV_ISA_EXT_MAX. 0-25 range is reserved for single letter
+ * extensions while all the multi-letter extensions should define the next
+ * available logical extension id.
+ */
+enum riscv_isa_ext_id {
+ RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
+ RISCV_ISA_EXT_SVPBMT,
+ RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
+};
+
+struct riscv_isa_ext_data {
+ /* Name of the extension displayed to userspace via /proc/cpuinfo */
+ char uprop[RISCV_ISA_EXT_NAME_LEN_MAX];
+ /* The logical ISA extension ID */
+ unsigned int isa_ext_id;
+};
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 3835c3295dc5..69605a474270 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/pgtable.h>
#include <asm/mmiowb.h>
+#include <asm/early_ioremap.h>
/*
* MMIO access functions are separated out to break dependency cycles
@@ -51,19 +52,6 @@
#define __io_pbw() __asm__ __volatile__ ("fence iow,o" : : : "memory");
#define __io_paw() __asm__ __volatile__ ("fence o,io" : : : "memory");
-#define inb(c) ({ u8 __v; __io_pbr(); __v = readb_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; })
-#define inw(c) ({ u16 __v; __io_pbr(); __v = readw_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; })
-#define inl(c) ({ u32 __v; __io_pbr(); __v = readl_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; })
-
-#define outb(v,c) ({ __io_pbw(); writeb_cpu((v),(void*)(PCI_IOBASE + (c))); __io_paw(); })
-#define outw(v,c) ({ __io_pbw(); writew_cpu((v),(void*)(PCI_IOBASE + (c))); __io_paw(); })
-#define outl(v,c) ({ __io_pbw(); writel_cpu((v),(void*)(PCI_IOBASE + (c))); __io_paw(); })
-
-#ifdef CONFIG_64BIT
-#define inq(c) ({ u64 __v; __io_pbr(); __v = readq_cpu((void*)(c)); __io_par(__v); __v; })
-#define outq(v,c) ({ __io_pbw(); writeq_cpu((v),(void*)(c)); __io_paw(); })
-#endif
-
/*
* Accesses from a single hart to a single I/O address must be ordered. This
* allows us to use the raw read macros, but we still need to fence before and
diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h
index 9807ad164015..e4c435509983 100644
--- a/arch/riscv/include/asm/irq.h
+++ b/arch/riscv/include/asm/irq.h
@@ -12,4 +12,6 @@
#include <asm-generic/irq.h>
+extern void __init init_IRQ(void);
+
#endif /* _ASM_RISCV_IRQ_H */
diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h
new file mode 100644
index 000000000000..b53891964ae0
--- /dev/null
+++ b/arch/riscv/include/asm/irq_work.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_IRQ_WORK_H
+#define _ASM_RISCV_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+ return IS_ENABLED(CONFIG_SMP);
+}
+extern void arch_irq_work_raise(void);
+#endif /* _ASM_RISCV_IRQ_WORK_H */
diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h
new file mode 100644
index 000000000000..38af2ec7b9bf
--- /dev/null
+++ b/arch/riscv/include/asm/jump_label.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Emil Renner Berthing
+ *
+ * Based on arch/arm64/include/asm/jump_label.h
+ */
+#ifndef __ASM_JUMP_LABEL_H
+#define __ASM_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/asm.h>
+
+#define JUMP_LABEL_NOP_SIZE 4
+
+static __always_inline bool arch_static_branch(struct static_key *key,
+ bool branch)
+{
+ asm_volatile_goto(
+ " .option push \n\t"
+ " .option norelax \n\t"
+ " .option norvc \n\t"
+ "1: nop \n\t"
+ " .option pop \n\t"
+ " .pushsection __jump_table, \"aw\" \n\t"
+ " .align " RISCV_LGPTR " \n\t"
+ " .long 1b - ., %l[label] - . \n\t"
+ " " RISCV_PTR " %0 - . \n\t"
+ " .popsection \n\t"
+ : : "i"(&((char *)key)[branch]) : : label);
+
+ return false;
+label:
+ return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key,
+ bool branch)
+{
+ asm_volatile_goto(
+ " .option push \n\t"
+ " .option norelax \n\t"
+ " .option norvc \n\t"
+ "1: jal zero, %l[label] \n\t"
+ " .option pop \n\t"
+ " .pushsection __jump_table, \"aw\" \n\t"
+ " .align " RISCV_LGPTR " \n\t"
+ " .long 1b - ., %l[label] - . \n\t"
+ " " RISCV_PTR " %0 - . \n\t"
+ " .popsection \n\t"
+ : : "i"(&((char *)key)[branch]) : : label);
+
+ return false;
+label:
+ return true;
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h
index b04028c6218c..0b85e363e778 100644
--- a/arch/riscv/include/asm/kasan.h
+++ b/arch/riscv/include/asm/kasan.h
@@ -8,17 +8,37 @@
#ifdef CONFIG_KASAN
+/*
+ * The following comment was copied from arm64:
+ * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
+ * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses,
+ * where N = (1 << KASAN_SHADOW_SCALE_SHIFT).
+ *
+ * KASAN_SHADOW_OFFSET:
+ * This value is used to map an address to the corresponding shadow
+ * address by the following formula:
+ * shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
+ *
+ * (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range
+ * [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual
+ * addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation:
+ * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END -
+ * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT))
+ */
#define KASAN_SHADOW_SCALE_SHIFT 3
-#define KASAN_SHADOW_SIZE (UL(1) << (38 - KASAN_SHADOW_SCALE_SHIFT))
-#define KASAN_SHADOW_START KERN_VIRT_START /* 2^64 - 2^38 */
-#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
-
-#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \
- (64 - KASAN_SHADOW_SCALE_SHIFT)))
+#define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
+/*
+ * Depending on the size of the virtual address space, the region may not be
+ * aligned on PGDIR_SIZE, so force its alignment to ease its population.
+ */
+#define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK)
+#define KASAN_SHADOW_END MODULES_LOWEST_VADDR
+#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
void kasan_init(void);
asmlinkage void kasan_early_init(void);
+void kasan_swapper_init(void);
#endif
#endif
diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h
new file mode 100644
index 000000000000..eee260e8ab30
--- /dev/null
+++ b/arch/riscv/include/asm/kexec.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
+ */
+
+#ifndef _RISCV_KEXEC_H
+#define _RISCV_KEXEC_H
+
+#include <asm/page.h> /* For PAGE_SIZE */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* Reserve a page for the control code buffer */
+#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
+
+#define KEXEC_ARCH KEXEC_ARCH_RISCV
+
+extern void riscv_crash_save_regs(struct pt_regs *newregs);
+
+static inline void
+crash_setup_regs(struct pt_regs *newregs,
+ struct pt_regs *oldregs)
+{
+ if (oldregs)
+ memcpy(newregs, oldregs, sizeof(struct pt_regs));
+ else
+ riscv_crash_save_regs(newregs);
+}
+
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ unsigned long fdt_addr;
+};
+
+extern const unsigned char riscv_kexec_relocate[];
+extern const unsigned int riscv_kexec_relocate_size;
+
+typedef void (*riscv_kexec_method)(unsigned long first_ind_entry,
+ unsigned long jump_addr,
+ unsigned long fdt_addr,
+ unsigned long hartid,
+ unsigned long va_pa_off);
+
+extern riscv_kexec_method riscv_kexec_norelocate;
+
+#ifdef CONFIG_KEXEC_FILE
+extern const struct kexec_file_ops elf_kexec_ops;
+
+struct purgatory_info;
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab);
+#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
+#endif
+
+#endif
diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h
new file mode 100644
index 000000000000..d887a54042aa
--- /dev/null
+++ b/arch/riscv/include/asm/kfence.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_KFENCE_H
+#define _ASM_RISCV_KFENCE_H
+
+#include <linux/kfence.h>
+#include <linux/pfn.h>
+#include <asm-generic/pgalloc.h>
+#include <asm/pgtable.h>
+
+static inline int split_pmd_page(unsigned long addr)
+{
+ int i;
+ unsigned long pfn = PFN_DOWN(__pa((addr & PMD_MASK)));
+ pmd_t *pmd = pmd_off_k(addr);
+ pte_t *pte = pte_alloc_one_kernel(&init_mm);
+
+ if (!pte)
+ return -ENOMEM;
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(pte + i, pfn_pte(pfn + i, PAGE_KERNEL));
+ set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(pte)), PAGE_TABLE));
+
+ flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+ return 0;
+}
+
+static inline bool arch_kfence_init_pool(void)
+{
+ int ret;
+ unsigned long addr;
+ pmd_t *pmd;
+
+ for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
+ addr += PAGE_SIZE) {
+ pmd = pmd_off_k(addr);
+
+ if (pmd_leaf(*pmd)) {
+ ret = split_pmd_page(addr);
+ if (ret)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ pte_t *pte = virt_to_kpte(addr);
+
+ if (protect)
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+ else
+ set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ return true;
+}
+
+#endif /* _ASM_RISCV_KFENCE_H */
diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h
index 8177a457caff..46677daf708b 100644
--- a/arch/riscv/include/asm/kgdb.h
+++ b/arch/riscv/include/asm/kgdb.h
@@ -19,7 +19,6 @@
#ifndef __ASSEMBLY__
-extern int kgdb_has_hit_break(unsigned long addr);
extern unsigned long kgdb_compiled_break;
static inline void arch_kgdb_breakpoint(void)
@@ -106,7 +105,9 @@ static inline void arch_kgdb_breakpoint(void)
#define DBG_REG_BADADDR_OFF 34
#define DBG_REG_CAUSE_OFF 35
-#include <asm/gdb_xml.h>
+extern const char riscv_gdb_stub_feature[64];
+
+#define kgdb_arch_gdb_stub_feature riscv_gdb_stub_feature
#endif
#endif
diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h
index 56a98ea30731..217ef89f22b9 100644
--- a/arch/riscv/include/asm/kprobes.h
+++ b/arch/riscv/include/asm/kprobes.h
@@ -11,4 +11,37 @@
#include <asm-generic/kprobes.h>
+#ifdef CONFIG_KPROBES
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE 2
+
+#define flush_insn_slot(p) do { } while (0)
+#define kretprobe_blacklist_size 0
+
+#include <asm/probes.h>
+
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned int status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+ unsigned int kprobe_status;
+ unsigned long saved_status;
+ struct prev_kprobe prev_kprobe;
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr);
+bool kprobe_breakpoint_handler(struct pt_regs *regs);
+bool kprobe_single_step_handler(struct pt_regs *regs);
+void __kretprobe_trampoline(void);
+void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
+
+#endif /* CONFIG_KPROBES */
#endif /* _ASM_RISCV_KPROBES_H */
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
new file mode 100644
index 000000000000..319c8aeb42af
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -0,0 +1,330 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#ifndef __RISCV_KVM_HOST_H__
+#define __RISCV_KVM_HOST_H__
+
+#include <linux/types.h>
+#include <linux/kvm.h>
+#include <linux/kvm_types.h>
+#include <linux/spinlock.h>
+#include <asm/csr.h>
+#include <asm/kvm_vcpu_fp.h>
+#include <asm/kvm_vcpu_timer.h>
+
+#define KVM_MAX_VCPUS 1024
+
+#define KVM_HALT_POLL_NS_DEFAULT 500000
+
+#define KVM_VCPU_MAX_FEATURES 0
+
+#define KVM_REQ_SLEEP \
+ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
+#define KVM_REQ_UPDATE_HGATP KVM_ARCH_REQ(2)
+#define KVM_REQ_FENCE_I \
+ KVM_ARCH_REQ_FLAGS(3, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HFENCE_GVMA_VMID_ALL KVM_REQ_TLB_FLUSH
+#define KVM_REQ_HFENCE_VVMA_ALL \
+ KVM_ARCH_REQ_FLAGS(4, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HFENCE \
+ KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+
+enum kvm_riscv_hfence_type {
+ KVM_RISCV_HFENCE_UNKNOWN = 0,
+ KVM_RISCV_HFENCE_GVMA_VMID_GPA,
+ KVM_RISCV_HFENCE_VVMA_ASID_GVA,
+ KVM_RISCV_HFENCE_VVMA_ASID_ALL,
+ KVM_RISCV_HFENCE_VVMA_GVA,
+};
+
+struct kvm_riscv_hfence {
+ enum kvm_riscv_hfence_type type;
+ unsigned long asid;
+ unsigned long order;
+ gpa_t addr;
+ gpa_t size;
+};
+
+#define KVM_RISCV_VCPU_MAX_HFENCE 64
+
+struct kvm_vm_stat {
+ struct kvm_vm_stat_generic generic;
+};
+
+struct kvm_vcpu_stat {
+ struct kvm_vcpu_stat_generic generic;
+ u64 ecall_exit_stat;
+ u64 wfi_exit_stat;
+ u64 mmio_exit_user;
+ u64 mmio_exit_kernel;
+ u64 exits;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+struct kvm_vmid {
+ /*
+ * Writes to vmid_version and vmid happen with vmid_lock held
+ * whereas reads happen without any lock held.
+ */
+ unsigned long vmid_version;
+ unsigned long vmid;
+};
+
+struct kvm_arch {
+ /* G-stage vmid */
+ struct kvm_vmid vmid;
+
+ /* G-stage page table */
+ pgd_t *pgd;
+ phys_addr_t pgd_phys;
+
+ /* Guest Timer */
+ struct kvm_guest_timer timer;
+};
+
+struct kvm_mmio_decode {
+ unsigned long insn;
+ int insn_len;
+ int len;
+ int shift;
+ int return_handled;
+};
+
+struct kvm_sbi_context {
+ int return_handled;
+};
+
+struct kvm_cpu_trap {
+ unsigned long sepc;
+ unsigned long scause;
+ unsigned long stval;
+ unsigned long htval;
+ unsigned long htinst;
+};
+
+struct kvm_cpu_context {
+ unsigned long zero;
+ unsigned long ra;
+ unsigned long sp;
+ unsigned long gp;
+ unsigned long tp;
+ unsigned long t0;
+ unsigned long t1;
+ unsigned long t2;
+ unsigned long s0;
+ unsigned long s1;
+ unsigned long a0;
+ unsigned long a1;
+ unsigned long a2;
+ unsigned long a3;
+ unsigned long a4;
+ unsigned long a5;
+ unsigned long a6;
+ unsigned long a7;
+ unsigned long s2;
+ unsigned long s3;
+ unsigned long s4;
+ unsigned long s5;
+ unsigned long s6;
+ unsigned long s7;
+ unsigned long s8;
+ unsigned long s9;
+ unsigned long s10;
+ unsigned long s11;
+ unsigned long t3;
+ unsigned long t4;
+ unsigned long t5;
+ unsigned long t6;
+ unsigned long sepc;
+ unsigned long sstatus;
+ unsigned long hstatus;
+ union __riscv_fp_state fp;
+};
+
+struct kvm_vcpu_csr {
+ unsigned long vsstatus;
+ unsigned long vsie;
+ unsigned long vstvec;
+ unsigned long vsscratch;
+ unsigned long vsepc;
+ unsigned long vscause;
+ unsigned long vstval;
+ unsigned long hvip;
+ unsigned long vsatp;
+ unsigned long scounteren;
+};
+
+struct kvm_vcpu_arch {
+ /* VCPU ran at least once */
+ bool ran_atleast_once;
+
+ /* Last Host CPU on which Guest VCPU exited */
+ int last_exit_cpu;
+
+ /* ISA feature bits (similar to MISA) */
+ unsigned long isa;
+
+ /* SSCRATCH, STVEC, and SCOUNTEREN of Host */
+ unsigned long host_sscratch;
+ unsigned long host_stvec;
+ unsigned long host_scounteren;
+
+ /* CPU context of Host */
+ struct kvm_cpu_context host_context;
+
+ /* CPU context of Guest VCPU */
+ struct kvm_cpu_context guest_context;
+
+ /* CPU CSR context of Guest VCPU */
+ struct kvm_vcpu_csr guest_csr;
+
+ /* CPU context upon Guest VCPU reset */
+ struct kvm_cpu_context guest_reset_context;
+
+ /* CPU CSR context upon Guest VCPU reset */
+ struct kvm_vcpu_csr guest_reset_csr;
+
+ /*
+ * VCPU interrupts
+ *
+ * We have a lockless approach for tracking pending VCPU interrupts
+ * implemented using atomic bitops. The irqs_pending bitmap represent
+ * pending interrupts whereas irqs_pending_mask represent bits changed
+ * in irqs_pending. Our approach is modeled around multiple producer
+ * and single consumer problem where the consumer is the VCPU itself.
+ */
+ unsigned long irqs_pending;
+ unsigned long irqs_pending_mask;
+
+ /* VCPU Timer */
+ struct kvm_vcpu_timer timer;
+
+ /* HFENCE request queue */
+ spinlock_t hfence_lock;
+ unsigned long hfence_head;
+ unsigned long hfence_tail;
+ struct kvm_riscv_hfence hfence_queue[KVM_RISCV_VCPU_MAX_HFENCE];
+
+ /* MMIO instruction details */
+ struct kvm_mmio_decode mmio_decode;
+
+ /* SBI context */
+ struct kvm_sbi_context sbi_context;
+
+ /* Cache pages needed to program page tables with spinlock held */
+ struct kvm_mmu_memory_cache mmu_page_cache;
+
+ /* VCPU power-off state */
+ bool power_off;
+
+ /* Don't run the VCPU (blocked) */
+ bool pause;
+};
+
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+
+#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
+
+void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
+ gpa_t gpa, gpa_t gpsz,
+ unsigned long order);
+void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid);
+void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz,
+ unsigned long order);
+void kvm_riscv_local_hfence_gvma_all(void);
+void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid,
+ unsigned long asid,
+ unsigned long gva,
+ unsigned long gvsz,
+ unsigned long order);
+void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid,
+ unsigned long asid);
+void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order);
+void kvm_riscv_local_hfence_vvma_all(unsigned long vmid);
+
+void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu);
+
+void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu);
+void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu);
+void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu);
+void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu);
+
+void kvm_riscv_fence_i(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask);
+void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ gpa_t gpa, gpa_t gpsz,
+ unsigned long order);
+void kvm_riscv_hfence_gvma_vmid_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask);
+void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order, unsigned long asid);
+void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long asid);
+void kvm_riscv_hfence_vvma_gva(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order);
+void kvm_riscv_hfence_vvma_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask);
+
+int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
+ struct kvm_memory_slot *memslot,
+ gpa_t gpa, unsigned long hva, bool is_write);
+int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm);
+void kvm_riscv_gstage_free_pgd(struct kvm *kvm);
+void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu);
+void kvm_riscv_gstage_mode_detect(void);
+unsigned long kvm_riscv_gstage_mode(void);
+int kvm_riscv_gstage_gpa_bits(void);
+
+void kvm_riscv_gstage_vmid_detect(void);
+unsigned long kvm_riscv_gstage_vmid_bits(void);
+int kvm_riscv_gstage_vmid_init(struct kvm *kvm);
+bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid);
+void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu);
+
+void __kvm_riscv_unpriv_trap(void);
+
+void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu);
+unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
+ bool read_insn,
+ unsigned long guest_addr,
+ struct kvm_cpu_trap *trap);
+void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
+ struct kvm_cpu_trap *trap);
+int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_cpu_trap *trap);
+
+void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
+
+int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
+int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
+void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
+bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask);
+void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
+
+int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+#endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/include/asm/kvm_types.h b/arch/riscv/include/asm/kvm_types.h
new file mode 100644
index 000000000000..e15765f98d7a
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_types.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_KVM_TYPES_H
+#define _ASM_RISCV_KVM_TYPES_H
+
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 32
+
+#endif /* _ASM_RISCV_KVM_TYPES_H */
diff --git a/arch/riscv/include/asm/kvm_vcpu_fp.h b/arch/riscv/include/asm/kvm_vcpu_fp.h
new file mode 100644
index 000000000000..4da9b8e0f050
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_fp.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#ifndef __KVM_VCPU_RISCV_FP_H
+#define __KVM_VCPU_RISCV_FP_H
+
+#include <linux/types.h>
+
+struct kvm_cpu_context;
+
+#ifdef CONFIG_FPU
+void __kvm_riscv_fp_f_save(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_f_restore(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_d_save(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_d_restore(struct kvm_cpu_context *context);
+
+void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
+ unsigned long isa);
+void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
+ unsigned long isa);
+void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx);
+void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx);
+#else
+static inline void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
+{
+}
+static inline void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+}
+static inline void kvm_riscv_vcpu_guest_fp_restore(
+ struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+}
+static inline void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
+{
+}
+static inline void kvm_riscv_vcpu_host_fp_restore(
+ struct kvm_cpu_context *cntx)
+{
+}
+#endif
+
+int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype);
+int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype);
+
+#endif
diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h
new file mode 100644
index 000000000000..83d6d4d2b1df
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/**
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#ifndef __RISCV_KVM_VCPU_SBI_H__
+#define __RISCV_KVM_VCPU_SBI_H__
+
+#define KVM_SBI_IMPID 3
+
+#define KVM_SBI_VERSION_MAJOR 0
+#define KVM_SBI_VERSION_MINOR 3
+
+struct kvm_vcpu_sbi_extension {
+ unsigned long extid_start;
+ unsigned long extid_end;
+ /**
+ * SBI extension handler. It can be defined for a given extension or group of
+ * extension. But it should always return linux error codes rather than SBI
+ * specific error codes.
+ */
+ int (*handler)(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val, struct kvm_cpu_trap *utrap,
+ bool *exit);
+};
+
+void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run);
+void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ u32 type, u64 flags);
+const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid);
+
+#endif /* __RISCV_KVM_VCPU_SBI_H__ */
diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h
new file mode 100644
index 000000000000..375281eb49e0
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_timer.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#ifndef __KVM_VCPU_RISCV_TIMER_H
+#define __KVM_VCPU_RISCV_TIMER_H
+
+#include <linux/hrtimer.h>
+
+struct kvm_guest_timer {
+ /* Mult & Shift values to get nanoseconds from cycles */
+ u32 nsec_mult;
+ u32 nsec_shift;
+ /* Time delta value */
+ u64 time_delta;
+};
+
+struct kvm_vcpu_timer {
+ /* Flag for whether init is done */
+ bool init_done;
+ /* Flag for whether timer event is configured */
+ bool next_set;
+ /* Next timer event cycles */
+ u64 next_cycles;
+ /* Underlying hrtimer instance */
+ struct hrtimer hrt;
+};
+
+int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles);
+int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu);
+int kvm_riscv_guest_timer_init(struct kvm *kvm);
+
+#endif
diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h
index 56053c9838b2..aff6c33ab0c0 100644
--- a/arch/riscv/include/asm/mmio.h
+++ b/arch/riscv/include/asm/mmio.h
@@ -14,12 +14,6 @@
#include <linux/types.h>
#include <asm/mmiowb.h>
-#ifndef CONFIG_MMU
-#define pgprot_noncached(x) (x)
-#define pgprot_writecombine(x) (x)
-#define pgprot_device(x) (x)
-#endif /* CONFIG_MMU */
-
/* Generic IO read/write. These perform native-endian accesses. */
#define __raw_writeb __raw_writeb
static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 967eacb01ab5..cedcf8ea3c76 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -12,14 +12,19 @@
typedef struct {
#ifndef CONFIG_MMU
unsigned long end_brk;
+#else
+ atomic_long_t id;
#endif
void *vdso;
+ void *vdso_info;
#ifdef CONFIG_SMP
/* A local icache flush is needed before user execution can resume. */
cpumask_t icache_stale_mask;
#endif
} mm_context_t;
+void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_MMU_H */
diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h
index 67c463812e2d..7030837adc1a 100644
--- a/arch/riscv/include/asm/mmu_context.h
+++ b/arch/riscv/include/asm/mmu_context.h
@@ -13,34 +13,28 @@
#include <linux/mm.h>
#include <linux/sched.h>
-static inline void enter_lazy_tlb(struct mm_struct *mm,
- struct task_struct *task)
-{
-}
-
-/* Initialize context-related info for a new mm_struct */
-static inline int init_new_context(struct task_struct *task,
- struct mm_struct *mm)
-{
- return 0;
-}
-
-static inline void destroy_context(struct mm_struct *mm)
-{
-}
-
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *task);
+#define activate_mm activate_mm
static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
switch_mm(prev, next, NULL);
}
-static inline void deactivate_mm(struct task_struct *task,
- struct mm_struct *mm)
+#define init_new_context init_new_context
+static inline int init_new_context(struct task_struct *tsk,
+ struct mm_struct *mm)
{
+#ifdef CONFIG_MMU
+ atomic_long_set(&mm->context.id, 0);
+#endif
+ return 0;
}
+DECLARE_STATIC_KEY_FALSE(use_asid_allocator);
+
+#include <asm-generic/mmu_context.h>
+
#endif /* _ASM_RISCV_MMU_CONTEXT_H */
diff --git a/arch/riscv/include/asm/mmzone.h b/arch/riscv/include/asm/mmzone.h
new file mode 100644
index 000000000000..fa17e01d9ab2
--- /dev/null
+++ b/arch/riscv/include/asm/mmzone.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMZONE_H
+#define __ASM_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+#include <asm/numa.h>
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[(nid)])
+
+#endif /* CONFIG_NUMA */
+#endif /* __ASM_MMZONE_H */
diff --git a/arch/riscv/kernel/module.lds b/arch/riscv/include/asm/module.lds.h
index 295ecfb341a2..1075beae1ac6 100644
--- a/arch/riscv/kernel/module.lds
+++ b/arch/riscv/include/asm/module.lds.h
@@ -1,8 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2017 Andes Technology Corporation */
-
+#ifdef CONFIG_MODULE_SECTIONS
SECTIONS {
- .plt (NOLOAD) : { BYTE(0) }
- .got (NOLOAD) : { BYTE(0) }
- .got.plt (NOLOAD) : { BYTE(0) }
+ .plt : { BYTE(0) }
+ .got : { BYTE(0) }
+ .got.plt : { BYTE(0) }
}
+#endif
diff --git a/arch/riscv/include/asm/numa.h b/arch/riscv/include/asm/numa.h
new file mode 100644
index 000000000000..8c8cf4297cc3
--- /dev/null
+++ b/arch/riscv/include/asm/numa.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_NUMA_H
+#define __ASM_NUMA_H
+
+#include <asm/topology.h>
+#include <asm-generic/numa.h>
+
+#endif /* __ASM_NUMA_H */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 2d50f76efe48..1526e410e802 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -31,22 +31,24 @@
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
+#ifdef CONFIG_64BIT
+#ifdef CONFIG_MMU
+#define PAGE_OFFSET kernel_map.page_offset
+#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
-
-#define KERN_VIRT_SIZE (-PAGE_OFFSET)
+#endif
+/*
+ * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
+ * define the PAGE_OFFSET value for SV39.
+ */
+#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL)
+#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
+#else
+#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
+#endif /* CONFIG_64BIT */
#ifndef __ASSEMBLY__
-#define PAGE_UP(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1)))
-#define PAGE_DOWN(addr) ((addr)&(~((PAGE_SIZE)-1)))
-
-/* align addr on a size boundary - adjust address up/down if needed */
-#define _ALIGN_UP(addr, size) (((addr)+((size)-1))&(~((size)-1)))
-#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
-
-/* align addr on a size boundary - adjust address up if needed */
-#define _ALIGN(addr, size) _ALIGN_UP(addr, size)
-
#define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE)
#define copy_page(to, from) memcpy((to), (from), PAGE_SIZE)
@@ -89,19 +91,59 @@ typedef struct page *pgtable_t;
#endif
#ifdef CONFIG_MMU
-extern unsigned long va_pa_offset;
-extern unsigned long pfn_base;
-#define ARCH_PFN_OFFSET (pfn_base)
+extern unsigned long riscv_pfn_base;
+#define ARCH_PFN_OFFSET (riscv_pfn_base)
#else
-#define va_pa_offset 0
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
#endif /* CONFIG_MMU */
-extern unsigned long max_low_pfn;
-extern unsigned long min_low_pfn;
-
-#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
-#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
+struct kernel_mapping {
+ unsigned long page_offset;
+ unsigned long virt_addr;
+ uintptr_t phys_addr;
+ uintptr_t size;
+ /* Offset between linear mapping virtual address and kernel load address */
+ unsigned long va_pa_offset;
+ /* Offset between kernel mapping virtual address and kernel load address */
+ unsigned long va_kernel_pa_offset;
+ unsigned long va_kernel_xip_pa_offset;
+#ifdef CONFIG_XIP_KERNEL
+ uintptr_t xiprom;
+ uintptr_t xiprom_sz;
+#endif
+};
+
+extern struct kernel_mapping kernel_map;
+extern phys_addr_t phys_ram_base;
+
+#define is_kernel_mapping(x) \
+ ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
+
+#define is_linear_mapping(x) \
+ ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE))
+
+#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
+#define kernel_mapping_pa_to_va(y) ({ \
+ unsigned long _y = y; \
+ (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \
+ (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset) : \
+ (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \
+ })
+#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x)
+
+#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset)
+#define kernel_mapping_va_to_pa(y) ({ \
+ unsigned long _y = y; \
+ (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \
+ ((unsigned long)(_y) - kernel_map.va_kernel_xip_pa_offset) : \
+ ((unsigned long)(_y) - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \
+ })
+
+#define __va_to_pa_nodebug(x) ({ \
+ unsigned long _x = x; \
+ is_linear_mapping(_x) ? \
+ linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x); \
+ })
#ifdef CONFIG_DEBUG_VIRTUAL
extern phys_addr_t __virt_to_phys(unsigned long x);
@@ -128,6 +170,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
#define page_to_bus(page) (page_to_phys(page))
#define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr)))
+#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
+
#ifdef CONFIG_FLATMEM
#define pfn_valid(pfn) \
(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
@@ -135,7 +179,10 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
#endif /* __ASSEMBLY__ */
-#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr)))
+#define virt_addr_valid(vaddr) ({ \
+ unsigned long _addr = (unsigned long)vaddr; \
+ (unsigned long)(_addr) >= PAGE_OFFSET && pfn_valid(virt_to_pfn(_addr)); \
+})
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h
index 1c473a1bd986..7fd52a30e605 100644
--- a/arch/riscv/include/asm/pci.h
+++ b/arch/riscv/include/asm/pci.h
@@ -18,6 +18,8 @@
/* RISC-V shim does not initialize PCI bus */
#define pcibios_assign_all_busses() 1
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
+
extern int isa_dma_bridge_buggy;
#ifdef CONFIG_PCI
@@ -32,6 +34,20 @@ static inline int pci_proc_domain(struct pci_bus *bus)
/* always show the domain in /proc */
return 1;
}
+
+#ifdef CONFIG_NUMA
+
+static inline int pcibus_to_node(struct pci_bus *bus)
+{
+ return dev_to_node(&bus->dev);
+}
+#ifndef cpumask_of_pcibus
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
+ cpu_all_mask : \
+ cpumask_of_node(pcibus_to_node(bus)))
+#endif
+#endif /* CONFIG_NUMA */
+
#endif /* CONFIG_PCI */
#endif /* _ASM_RISCV_PCI_H */
diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
index 062efd3a1d5d..d42c901f9a97 100644
--- a/arch/riscv/include/asm/perf_event.h
+++ b/arch/riscv/include/asm/perf_event.h
@@ -9,77 +9,5 @@
#define _ASM_RISCV_PERF_EVENT_H
#include <linux/perf_event.h>
-#include <linux/ptrace.h>
-#include <linux/interrupt.h>
-
-#ifdef CONFIG_RISCV_BASE_PMU
-#define RISCV_BASE_COUNTERS 2
-
-/*
- * The RISCV_MAX_COUNTERS parameter should be specified.
- */
-
-#define RISCV_MAX_COUNTERS 2
-
-/*
- * These are the indexes of bits in counteren register *minus* 1,
- * except for cycle. It would be coherent if it can directly mapped
- * to counteren bit definition, but there is a *time* register at
- * counteren[1]. Per-cpu structure is scarce resource here.
- *
- * According to the spec, an implementation can support counter up to
- * mhpmcounter31, but many high-end processors has at most 6 general
- * PMCs, we give the definition to MHPMCOUNTER8 here.
- */
-#define RISCV_PMU_CYCLE 0
-#define RISCV_PMU_INSTRET 1
-#define RISCV_PMU_MHPMCOUNTER3 2
-#define RISCV_PMU_MHPMCOUNTER4 3
-#define RISCV_PMU_MHPMCOUNTER5 4
-#define RISCV_PMU_MHPMCOUNTER6 5
-#define RISCV_PMU_MHPMCOUNTER7 6
-#define RISCV_PMU_MHPMCOUNTER8 7
-
-#define RISCV_OP_UNSUPP (-EOPNOTSUPP)
-
-struct cpu_hw_events {
- /* # currently enabled events*/
- int n_events;
- /* currently enabled events */
- struct perf_event *events[RISCV_MAX_COUNTERS];
- /* vendor-defined PMU data */
- void *platform;
-};
-
-struct riscv_pmu {
- struct pmu *pmu;
-
- /* generic hw/cache events table */
- const int *hw_events;
- const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX];
- /* method used to map hw/cache events */
- int (*map_hw_event)(u64 config);
- int (*map_cache_event)(u64 config);
-
- /* max generic hw events in map */
- int max_events;
- /* number total counters, 2(base) + x(general) */
- int num_counters;
- /* the width of the counter */
- int counter_width;
-
- /* vendor-defined PMU features */
- void *platform;
-
- irqreturn_t (*handle_irq)(int irq_num, void *dev);
- int irq;
-};
-
-#endif
-#ifdef CONFIG_PERF_EVENTS
#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
-#endif
-
#endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 3f601ee8233f..947f23d7b6af 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -11,7 +11,9 @@
#include <asm/tlb.h>
#ifdef CONFIG_MMU
-#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+#define __HAVE_ARCH_PUD_ALLOC_ONE
+#define __HAVE_ARCH_PUD_FREE
+#include <asm-generic/pgalloc.h>
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
@@ -36,9 +38,94 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
-#endif /* __PAGETABLE_PMD_FOLDED */
-#define pmd_pgtable(pmd) pmd_page(pmd)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
+ pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d_safe(p4d,
+ __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+ if (pgtable_l5_enabled) {
+ unsigned long pfn = virt_to_pfn(p4d);
+
+ set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
+ p4d_t *p4d)
+{
+ if (pgtable_l5_enabled) {
+ unsigned long pfn = virt_to_pfn(p4d);
+
+ set_pgd_safe(pgd,
+ __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+#define pud_alloc_one pud_alloc_one
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ if (pgtable_l4_enabled)
+ return __pud_alloc_one(mm, addr);
+
+ return NULL;
+}
+
+#define pud_free pud_free
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (pgtable_l4_enabled)
+ __pud_free(mm, pud);
+}
+
+#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
+
+#define p4d_alloc_one p4d_alloc_one
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ if (pgtable_l5_enabled) {
+ gfp_t gfp = GFP_PGTABLE_USER;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ return (p4d_t *)get_zeroed_page(gfp);
+ }
+
+ return NULL;
+}
+
+static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+ free_page((unsigned long)p4d);
+}
+
+#define p4d_free p4d_free
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ if (pgtable_l5_enabled)
+ __p4d_free(mm, p4d);
+}
+
+#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
+#endif /* __PAGETABLE_PMD_FOLDED */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
@@ -55,24 +142,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return pgd;
}
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- free_page((unsigned long)pgd);
-}
-
#ifndef __PAGETABLE_PMD_FOLDED
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- return (pmd_t *)__get_free_page(
- GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
- free_page((unsigned long)pmd);
-}
-
#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
#endif /* __PAGETABLE_PMD_FOLDED */
diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h
index b0ab66e5fdb1..59ba1fbaf784 100644
--- a/arch/riscv/include/asm/pgtable-32.h
+++ b/arch/riscv/include/asm/pgtable-32.h
@@ -7,6 +7,7 @@
#define _ASM_RISCV_PGTABLE_32_H
#include <asm-generic/pgtable-nopmd.h>
+#include <linux/bits.h>
#include <linux/const.h>
/* Size of region mapped by a page global directory */
@@ -14,4 +15,22 @@
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 34
+
+/*
+ * rv32 PTE format:
+ * | XLEN-1 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
+ * PFN reserved for SW D A G U X W R V
+ */
+#define _PAGE_PFN_MASK GENMASK(31, 10)
+
+#define _PAGE_NOCACHE 0
+#define _PAGE_IO 0
+#define _PAGE_MTMASK 0
+
+/* Set of bits to preserve across pte_modify() */
+#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \
+ _PAGE_WRITE | _PAGE_EXEC | \
+ _PAGE_USER | _PAGE_GLOBAL))
+
#endif /* _ASM_RISCV_PGTABLE_32_H */
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index f3b0da64c6c8..5c2aba5efbd0 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -6,18 +6,57 @@
#ifndef _ASM_RISCV_PGTABLE_64_H
#define _ASM_RISCV_PGTABLE_64_H
+#include <linux/bits.h>
#include <linux/const.h>
+#include <asm/errata_list.h>
-#define PGDIR_SHIFT 30
+extern bool pgtable_l4_enabled;
+extern bool pgtable_l5_enabled;
+
+#define PGDIR_SHIFT_L3 30
+#define PGDIR_SHIFT_L4 39
+#define PGDIR_SHIFT_L5 48
+#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
+
+#define PGDIR_SHIFT (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
+ (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+/* p4d is folded into pgd in case of 4-level page table */
+#define P4D_SHIFT 39
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE - 1))
+
+/* pud is folded into pgd in case of 3-level page table */
+#define PUD_SHIFT 30
+#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE - 1))
+
#define PMD_SHIFT 21
/* Size of region mapped by a page middle directory */
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
+/* Page 4th Directory entry */
+typedef struct {
+ unsigned long p4d;
+} p4d_t;
+
+#define p4d_val(x) ((x).p4d)
+#define __p4d(x) ((p4d_t) { (x) })
+#define PTRS_PER_P4D (PAGE_SIZE / sizeof(p4d_t))
+
+/* Page Upper Directory entry */
+typedef struct {
+ unsigned long pud;
+} pud_t;
+
+#define pud_val(x) ((x).pud)
+#define __pud(x) ((pud_t) { (x) })
+#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t))
+
/* Page Middle Directory entry */
typedef struct {
unsigned long pmd;
@@ -28,6 +67,71 @@ typedef struct {
#define PTRS_PER_PMD (PAGE_SIZE / sizeof(pmd_t))
+/*
+ * rv64 PTE format:
+ * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
+ * N MT RSV PFN reserved for SW D A G U X W R V
+ */
+#define _PAGE_PFN_MASK GENMASK(53, 10)
+
+/*
+ * [62:61] Svpbmt Memory Type definitions:
+ *
+ * 00 - PMA Normal Cacheable, No change to implied PMA memory type
+ * 01 - NC Non-cacheable, idempotent, weakly-ordered Main Memory
+ * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory
+ * 11 - Rsvd Reserved for future standard use
+ */
+#define _PAGE_NOCACHE_SVPBMT (1UL << 61)
+#define _PAGE_IO_SVPBMT (1UL << 62)
+#define _PAGE_MTMASK_SVPBMT (_PAGE_NOCACHE_SVPBMT | _PAGE_IO_SVPBMT)
+
+/*
+ * [63:59] T-Head Memory Type definitions:
+ *
+ * 00000 - NC Weakly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable
+ * 01110 - PMA Weakly-ordered, Cacheable, Bufferable, Shareable, Non-trustable
+ * 10000 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable
+ */
+#define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60))
+#define _PAGE_NOCACHE_THEAD 0UL
+#define _PAGE_IO_THEAD (1UL << 63)
+#define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59))
+
+static inline u64 riscv_page_mtmask(void)
+{
+ u64 val;
+
+ ALT_SVPBMT(val, _PAGE_MTMASK);
+ return val;
+}
+
+static inline u64 riscv_page_nocache(void)
+{
+ u64 val;
+
+ ALT_SVPBMT(val, _PAGE_NOCACHE);
+ return val;
+}
+
+static inline u64 riscv_page_io(void)
+{
+ u64 val;
+
+ ALT_SVPBMT(val, _PAGE_IO);
+ return val;
+}
+
+#define _PAGE_NOCACHE riscv_page_nocache()
+#define _PAGE_IO riscv_page_io()
+#define _PAGE_MTMASK riscv_page_mtmask()
+
+/* Set of bits to preserve across pte_modify() */
+#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \
+ _PAGE_WRITE | _PAGE_EXEC | \
+ _PAGE_USER | _PAGE_GLOBAL | \
+ _PAGE_MTMASK))
+
static inline int pud_present(pud_t pud)
{
return (pud_val(pud) & _PAGE_PRESENT);
@@ -46,8 +150,12 @@ static inline int pud_bad(pud_t pud)
#define pud_leaf pud_leaf
static inline int pud_leaf(pud_t pud)
{
- return pud_present(pud) &&
- (pud_val(pud) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
+ return pud_present(pud) && (pud_val(pud) & _PAGE_LEAF);
+}
+
+static inline int pud_user(pud_t pud)
+{
+ return pud_val(pud) & _PAGE_USER;
}
static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -60,27 +168,206 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
-static inline unsigned long pud_page_vaddr(pud_t pud)
+static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
+{
+ return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _pud_pfn(pud_t pud)
{
- return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
+ return pud_val(pud) >> _PAGE_PFN_SHIFT;
+}
+
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+ return (pmd_t *)pfn_to_virt(__page_val_to_pfn(pud_val(pud)));
}
static inline struct page *pud_page(pud_t pud)
{
- return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
+ return pfn_to_page(__page_val_to_pfn(pud_val(pud)));
+}
+
+#define mm_p4d_folded mm_p4d_folded
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+ if (pgtable_l5_enabled)
+ return false;
+
+ return true;
+}
+
+#define mm_pud_folded mm_pud_folded
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+ if (pgtable_l4_enabled)
+ return false;
+
+ return true;
}
+#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+
static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
{
- return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+ unsigned long prot_val = pgprot_val(prot);
+
+ ALT_THEAD_PMA(prot_val);
+
+ return __pmd((pfn << _PAGE_PFN_SHIFT) | prot_val);
}
static inline unsigned long _pmd_pfn(pmd_t pmd)
{
- return pmd_val(pmd) >> _PAGE_PFN_SHIFT;
+ return __page_val_to_pfn(pmd_val(pmd));
}
+#define mk_pmd(page, prot) pfn_pmd(page_to_pfn(page), prot)
+
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+
+#define p4d_ERROR(e) \
+ pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e))
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ *p4dp = p4d;
+ else
+ set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) == 0);
+
+ return 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) & _PAGE_PRESENT);
+
+ return 1;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return !p4d_present(p4d);
+
+ return 0;
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+ if (pgtable_l4_enabled)
+ set_p4d(p4d, __p4d(0));
+}
+
+static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot)
+{
+ return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _p4d_pfn(p4d_t p4d)
+{
+ return p4d_val(p4d) >> _PAGE_PFN_SHIFT;
+}
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+
+ return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
+}
+#define p4d_page_vaddr(p4d) ((unsigned long)p4d_pgtable(p4d))
+
+static inline struct page *p4d_page(p4d_t p4d)
+{
+ return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+}
+
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+#define pud_offset pud_offset
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ if (pgtable_l4_enabled)
+ return p4d_pgtable(*p4d) + pud_index(address);
+
+ return (pud_t *)p4d;
+}
+
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ *pgdp = pgd;
+ else
+ set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
+}
+
+static inline int pgd_none(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return (pgd_val(pgd) == 0);
+
+ return 0;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return (pgd_val(pgd) & _PAGE_PRESENT);
+
+ return 1;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return !pgd_present(pgd);
+
+ return 0;
+}
+
+static inline void pgd_clear(pgd_t *pgd)
+{
+ if (pgtable_l5_enabled)
+ set_pgd(pgd, __pgd(0));
+}
+
+static inline p4d_t *pgd_pgtable(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+
+ return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
+}
+#define pgd_page_vaddr(pgd) ((unsigned long)pgd_pgtable(pgd))
+
+static inline struct page *pgd_page(pgd_t pgd)
+{
+ return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+}
+#define pgd_page(pgd) pgd_page(pgd)
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+#define p4d_offset p4d_offset
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+ if (pgtable_l5_enabled)
+ return pgd_pgtable(*pgd) + p4d_index(address);
+
+ return (p4d_t *)pgd;
+}
+
#endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index bbaeb5d35842..b9e13a8fe2b7 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -6,12 +6,6 @@
#ifndef _ASM_RISCV_PGTABLE_BITS_H
#define _ASM_RISCV_PGTABLE_BITS_H
-/*
- * PTE format:
- * | XLEN-1 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
- * PFN reserved for SW D A G U X W R V
- */
-
#define _PAGE_ACCESSED_OFFSET 6
#define _PAGE_PRESENT (1 << 0)
@@ -31,13 +25,14 @@
* _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to
* distinguish them from swapped out pages
*/
-#define _PAGE_PROT_NONE _PAGE_READ
+#define _PAGE_PROT_NONE _PAGE_GLOBAL
#define _PAGE_PFN_SHIFT 10
-/* Set of bits to preserve across pte_modify() */
-#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \
- _PAGE_WRITE | _PAGE_EXEC | \
- _PAGE_USER | _PAGE_GLOBAL))
+/*
+ * when all of R/W/X are zero, the PTE is a pointer to the next level
+ * of the page table; otherwise, it is a leaf PTE.
+ */
+#define _PAGE_LEAF (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
#endif /* _ASM_RISCV_PGTABLE_BITS_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index eaea1f717010..1d1be9d9419c 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -11,33 +11,68 @@
#include <asm/pgtable-bits.h>
-#ifndef __ASSEMBLY__
+#ifndef CONFIG_MMU
+#define KERNEL_LINK_ADDR PAGE_OFFSET
+#define KERN_VIRT_SIZE (UL(-1))
+#else
-/* Page Upper Directory not used in RISC-V */
-#include <asm-generic/pgtable-nopud.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include <linux/mm_types.h>
+#define ADDRESS_SPACE_END (UL(-1))
-#ifdef CONFIG_MMU
+#ifdef CONFIG_64BIT
+/* Leave 2GB for kernel and BPF at the end of the address space */
+#define KERNEL_LINK_ADDR (ADDRESS_SPACE_END - SZ_2G + 1)
+#else
+#define KERNEL_LINK_ADDR PAGE_OFFSET
+#endif
+
+/* Number of entries in the page global directory */
+#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
+/* Number of entries in the page table */
+#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+
+/*
+ * Half of the kernel address space (half of the entries of the page global
+ * directory) is for the direct mapping.
+ */
+#define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2)
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END (PAGE_OFFSET - 1)
+#define VMALLOC_END PAGE_OFFSET
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
#define BPF_JIT_REGION_SIZE (SZ_128M)
+#ifdef CONFIG_64BIT
+#define BPF_JIT_REGION_START (BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_END (MODULES_END)
+#else
#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
#define BPF_JIT_REGION_END (VMALLOC_END)
+#endif
+
+/* Modules always live before the kernel */
+#ifdef CONFIG_64BIT
+/* This is used to define the end of the KASAN shadow region */
+#define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G)
+#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
+#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
+#endif
/*
* Roughly size the vmemmap space to be large enough to fit enough
* struct pages to map half the virtual address space. Then
* position vmemmap directly below the VMALLOC region.
*/
+#ifdef CONFIG_64BIT
+#define VA_BITS (pgtable_l5_enabled ? \
+ 57 : (pgtable_l4_enabled ? 48 : 39))
+#else
+#define VA_BITS 32
+#endif
+
#define VMEMMAP_SHIFT \
- (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+ (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
-#define VMEMMAP_END (VMALLOC_START - 1)
+#define VMEMMAP_END VMALLOC_START
#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
/*
@@ -60,25 +95,64 @@
#endif
+#ifdef CONFIG_XIP_KERNEL
+#define XIP_OFFSET SZ_32M
+#define XIP_OFFSET_MASK (SZ_32M - 1)
+#else
+#define XIP_OFFSET 0
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include <linux/mm_types.h>
+
+#define __page_val_to_pfn(_val) (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT)
+
#ifdef CONFIG_64BIT
#include <asm/pgtable-64.h>
#else
#include <asm/pgtable-32.h>
#endif /* CONFIG_64BIT */
-#ifdef CONFIG_MMU
-/* Number of entries in the page global directory */
-#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
-/* Number of entries in the page table */
-#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+#include <linux/page_table_check.h>
+
+#ifdef CONFIG_XIP_KERNEL
+#define XIP_FIXUP(addr) ({ \
+ uintptr_t __a = (uintptr_t)(addr); \
+ (__a >= CONFIG_XIP_PHYS_ADDR && \
+ __a < CONFIG_XIP_PHYS_ADDR + XIP_OFFSET * 2) ? \
+ __a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\
+ __a; \
+ })
+#else
+#define XIP_FIXUP(addr) (addr)
+#endif /* CONFIG_XIP_KERNEL */
+
+struct pt_alloc_ops {
+ pte_t *(*get_pte_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pte)(uintptr_t va);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t *(*get_pmd_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pmd)(uintptr_t va);
+ pud_t *(*get_pud_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pud)(uintptr_t va);
+ p4d_t *(*get_p4d_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_p4d)(uintptr_t va);
+#endif
+};
+extern struct pt_alloc_ops pt_ops __initdata;
+
+#ifdef CONFIG_MMU
/* Number of PGD entries that a user-mode program can use */
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
/* Page protection bits */
#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER)
-#define PAGE_NONE __pgprot(_PAGE_PROT_NONE)
+#define PAGE_NONE __pgprot(_PAGE_PROT_NONE | _PAGE_READ)
#define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ)
#define PAGE_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE)
#define PAGE_EXEC __pgprot(_PAGE_BASE | _PAGE_EXEC)
@@ -96,18 +170,19 @@
| _PAGE_WRITE \
| _PAGE_PRESENT \
| _PAGE_ACCESSED \
- | _PAGE_DIRTY)
+ | _PAGE_DIRTY \
+ | _PAGE_GLOBAL)
#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC)
+#define PAGE_KERNEL_READ_EXEC __pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \
+ | _PAGE_EXEC)
#define PAGE_TABLE __pgprot(_PAGE_TABLE)
-/*
- * The RISC-V ISA doesn't yet specify how to query or modify PMAs, so we can't
- * change the properties of memory regions.
- */
-#define _PAGE_IOREMAP _PAGE_KERNEL
+#define _PAGE_IOREMAP ((_PAGE_KERNEL & ~_PAGE_MTMASK) | _PAGE_IO)
+#define PAGE_KERNEL_IO __pgprot(_PAGE_IOREMAP)
extern pgd_t swapper_pg_dir[];
@@ -131,10 +206,23 @@ extern pgd_t swapper_pg_dir[];
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_present(pmd_t pmd)
+{
+ /*
+ * Checking for _PAGE_LEAF is needed too because:
+ * When splitting a THP, split_huge_page() will temporarily clear
+ * the present bit, in this situation, pmd_present() and
+ * pmd_trans_huge() still needs to return true.
+ */
+ return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE | _PAGE_LEAF));
+}
+#else
static inline int pmd_present(pmd_t pmd)
{
return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
}
+#endif
static inline int pmd_none(pmd_t pmd)
{
@@ -143,14 +231,13 @@ static inline int pmd_none(pmd_t pmd)
static inline int pmd_bad(pmd_t pmd)
{
- return !pmd_present(pmd);
+ return !pmd_present(pmd) || (pmd_val(pmd) & _PAGE_LEAF);
}
#define pmd_leaf pmd_leaf
static inline int pmd_leaf(pmd_t pmd)
{
- return pmd_present(pmd) &&
- (pmd_val(pmd) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
+ return pmd_present(pmd) && (pmd_val(pmd) & _PAGE_LEAF);
}
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
@@ -165,7 +252,11 @@ static inline void pmd_clear(pmd_t *pmdp)
static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot)
{
- return __pgd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+ unsigned long prot_val = pgprot_val(prot);
+
+ ALT_THEAD_PMA(prot_val);
+
+ return __pgd((pfn << _PAGE_PFN_SHIFT) | prot_val);
}
static inline unsigned long _pgd_pfn(pgd_t pgd)
@@ -175,18 +266,28 @@ static inline unsigned long _pgd_pfn(pgd_t pgd)
static inline struct page *pmd_page(pmd_t pmd)
{
- return pfn_to_page(pmd_val(pmd) >> _PAGE_PFN_SHIFT);
+ return pfn_to_page(__page_val_to_pfn(pmd_val(pmd)));
}
static inline unsigned long pmd_page_vaddr(pmd_t pmd)
{
- return (unsigned long)pfn_to_virt(pmd_val(pmd) >> _PAGE_PFN_SHIFT);
+ return (unsigned long)pfn_to_virt(__page_val_to_pfn(pmd_val(pmd)));
+}
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+ return __pte(pmd_val(pmd));
+}
+
+static inline pte_t pud_pte(pud_t pud)
+{
+ return __pte(pud_val(pud));
}
/* Yields the page frame number (PFN) of a page table entry */
static inline unsigned long pte_pfn(pte_t pte)
{
- return (pte_val(pte) >> _PAGE_PFN_SHIFT);
+ return __page_val_to_pfn(pte_val(pte));
}
#define pte_page(x) pfn_to_page(pte_pfn(x))
@@ -194,7 +295,11 @@ static inline unsigned long pte_pfn(pte_t pte)
/* Constructs a page table entry */
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
{
- return __pte((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+ unsigned long prot_val = pgprot_val(prot);
+
+ ALT_THEAD_PMA(prot_val);
+
+ return __pte((pfn << _PAGE_PFN_SHIFT) | prot_val);
}
#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
@@ -219,10 +324,14 @@ static inline int pte_exec(pte_t pte)
return pte_val(pte) & _PAGE_EXEC;
}
+static inline int pte_user(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_USER;
+}
+
static inline int pte_huge(pte_t pte)
{
- return pte_present(pte)
- && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
+ return pte_present(pte) && (pte_val(pte) & _PAGE_LEAF);
}
static inline int pte_dirty(pte_t pte)
@@ -286,10 +395,29 @@ static inline pte_t pte_mkhuge(pte_t pte)
return pte;
}
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * See the comment in include/asm-generic/pgtable.h
+ */
+static inline int pte_protnone(pte_t pte)
+{
+ return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) == _PAGE_PROT_NONE;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+ return pte_protnone(pmd_pte(pmd));
+}
+#endif
+
/* Modify page protection bits */
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
- return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+ unsigned long newprot_val = pgprot_val(newprot);
+
+ ALT_THEAD_PMA(newprot_val);
+
+ return __pte((pte_val(pte) & _PAGE_CHG_MASK) | newprot_val);
}
#define pgd_ERROR(e) \
@@ -310,6 +438,14 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
local_flush_tlb_page(address);
}
+static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ pte_t *ptep = (pte_t *)pmdp;
+
+ update_mmu_cache(vma, address, ptep);
+}
+
#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
@@ -328,7 +464,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
void flush_icache_pte(pte_t pte);
-static inline void set_pte_at(struct mm_struct *mm,
+static inline void __set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
if (pte_present(pteval) && pte_exec(pteval))
@@ -337,10 +473,17 @@ static inline void set_pte_at(struct mm_struct *mm,
set_pte(ptep, pteval);
}
+static inline void set_pte_at(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pteval)
+{
+ page_table_check_pte_set(mm, addr, ptep, pteval);
+ __set_pte_at(mm, addr, ptep, pteval);
+}
+
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- set_pte_at(mm, addr, ptep, __pte(0));
+ __set_pte_at(mm, addr, ptep, __pte(0));
}
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
@@ -361,7 +504,11 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
- return __pte(atomic_long_xchg((atomic_long_t *)ptep, 0));
+ pte_t pte = __pte(atomic_long_xchg((atomic_long_t *)ptep, 0));
+
+ page_table_check_pte_clear(mm, address, pte);
+
+ return pte;
}
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
@@ -403,16 +550,206 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
return ptep_test_and_clear_young(vma, address, ptep);
}
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t _prot)
+{
+ unsigned long prot = pgprot_val(_prot);
+
+ prot &= ~_PAGE_MTMASK;
+ prot |= _PAGE_IO;
+
+ return __pgprot(prot);
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
+{
+ unsigned long prot = pgprot_val(_prot);
+
+ prot &= ~_PAGE_MTMASK;
+ prot |= _PAGE_NOCACHE;
+
+ return __pgprot(prot);
+}
+
+/*
+ * THP functions
+ */
+static inline pmd_t pte_pmd(pte_t pte)
+{
+ return __pmd(pte_val(pte));
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ return pmd;
+}
+
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE));
+}
+
+#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT)
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT);
+}
+
+#define __pud_to_phys(pud) (pud_val(pud) >> _PAGE_PFN_SHIFT << PAGE_SHIFT)
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+ return ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT);
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
+}
+
+#define pmd_write pmd_write
+static inline int pmd_write(pmd_t pmd)
+{
+ return pte_write(pmd_pte(pmd));
+}
+
+static inline int pmd_dirty(pmd_t pmd)
+{
+ return pte_dirty(pmd_pte(pmd));
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+ return pte_young(pmd_pte(pmd));
+}
+
+static inline int pmd_user(pmd_t pmd)
+{
+ return pte_user(pmd_pte(pmd));
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+ return pte_pmd(pte_mkold(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ return pte_pmd(pte_mkwrite(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ return pte_pmd(pte_wrprotect(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+ return pte_pmd(pte_mkclean(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ return pte_pmd(pte_mkdirty(pmd_pte(pmd)));
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ page_table_check_pmd_set(mm, addr, pmdp, pmd);
+ return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
+}
+
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(mm, addr, pudp, pud);
+ return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
+}
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return pte_present(pte) && pte_user(pte);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_leaf(pmd) && pmd_user(pmd);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_leaf(pud) && pud_user(pud);
+}
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_leaf(pmd);
+}
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty)
+{
+ return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0));
+
+ page_table_check_pmd_clear(mm, address, pmd);
+
+ return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
+}
+
+#define pmdp_establish pmdp_establish
+static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp, pmd_t pmd)
+{
+ page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
+ return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd)));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
/*
* Encode and decode a swap entry
*
* Format of swap PTE:
* bit 0: _PAGE_PRESENT (zero)
- * bit 1: _PAGE_PROT_NONE (zero)
- * bits 2 to 6: swap type
- * bits 7 to XLEN-1: swap offset
+ * bit 1 to 3: _PAGE_LEAF (zero)
+ * bit 5: _PAGE_PROT_NONE (zero)
+ * bits 6 to 10: swap type
+ * bits 10 to XLEN-1: swap offset
*/
-#define __SWP_TYPE_SHIFT 2
+#define __SWP_TYPE_SHIFT 6
#define __SWP_TYPE_BITS 5
#define __SWP_TYPE_MASK ((1UL << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
@@ -428,12 +765,17 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
+#define __swp_entry_to_pmd(swp) __pmd((swp).val)
+#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
+
/*
* In the RV64 Linux scheme, we give the user half of the virtual-address space
* and give the kernel the other (upper) half.
*/
#ifdef CONFIG_64BIT
-#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE)
+#define KERN_VIRT_START (-(BIT(VA_BITS)) + TASK_SIZE)
#else
#define KERN_VIRT_START FIXADDR_START
#endif
@@ -441,11 +783,31 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
/*
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
+ * Task size is:
+ * - 0x9fc00000 (~2.5GB) for RV32.
+ * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
+ * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
+ *
+ * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
+ * Instruction Set Manual Volume II: Privileged Architecture" states that
+ * "load and store effective addresses, which are 64bits, must have bits
+ * 63–48 all equal to bit 47, or else a page-fault exception will occur."
*/
#ifdef CONFIG_64BIT
-#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
+
+#ifdef CONFIG_COMPAT
+#define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE)
+#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
+ TASK_SIZE_32 : TASK_SIZE_64)
+#else
+#define TASK_SIZE TASK_SIZE_64
+#endif
+
#else
-#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE_MIN TASK_SIZE
#endif
#else /* CONFIG_MMU */
@@ -457,17 +819,25 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define VMALLOC_START 0
#define VMALLOC_END TASK_SIZE
-static inline void __kernel_map_pages(struct page *page, int numpages, int enable) {}
-
#endif /* !CONFIG_MMU */
#define kern_addr_valid(addr) (1) /* FIXME */
-extern void *dtb_early_va;
-void setup_bootmem(void);
-void paging_init(void);
+extern char _start[];
+extern void *_dtb_early_va;
+extern uintptr_t _dtb_early_pa;
+#if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_MMU)
+#define dtb_early_va (*(void **)XIP_FIXUP(&_dtb_early_va))
+#define dtb_early_pa (*(uintptr_t *)XIP_FIXUP(&_dtb_early_pa))
+#else
+#define dtb_early_va _dtb_early_va
+#define dtb_early_pa _dtb_early_pa
+#endif /* CONFIG_XIP_KERNEL */
+extern u64 satp_mode;
+extern bool pgtable_l4_enabled;
-#define FIRST_USER_ADDRESS 0
+void paging_init(void);
+void misc_mem_init(void);
/*
* ZERO_PAGE is a global shared page that is always zero,
diff --git a/arch/riscv/include/asm/probes.h b/arch/riscv/include/asm/probes.h
new file mode 100644
index 000000000000..a787e6d537b9
--- /dev/null
+++ b/arch/riscv/include/asm/probes.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_PROBES_H
+#define _ASM_RISCV_PROBES_H
+
+typedef u32 probe_opcode_t;
+typedef bool (probes_handler_t) (u32 opcode, unsigned long addr, struct pt_regs *);
+
+/* architecture specific copy of original instruction */
+struct arch_probe_insn {
+ probe_opcode_t *insn;
+ probes_handler_t *handler;
+ /* restore address after simulation */
+ unsigned long restore;
+};
+
+#ifdef CONFIG_KPROBES
+typedef u32 kprobe_opcode_t;
+struct arch_specific_insn {
+ struct arch_probe_insn api;
+};
+#endif
+
+#endif /* _ASM_RISCV_PROBES_H */
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index bdddcd5c1b71..21c8072dce17 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -19,7 +19,11 @@
#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
#define STACK_TOP TASK_SIZE
-#define STACK_TOP_MAX STACK_TOP
+#ifdef CONFIG_64BIT
+#define STACK_TOP_MAX TASK_SIZE_64
+#else
+#define STACK_TOP_MAX TASK_SIZE
+#endif
#define STACK_ALIGN 16
#ifndef __ASSEMBLY__
@@ -34,8 +38,17 @@ struct thread_struct {
unsigned long sp; /* Kernel mode stack */
unsigned long s[12]; /* s[0]: frame pointer */
struct __riscv_d_ext_state fstate;
+ unsigned long bad_cause;
};
+/* Whitelist the fstate from the task_struct for hardened usercopy */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
+{
+ *offset = offsetof(struct thread_struct, fstate);
+ *size = sizeof_field(struct thread_struct, fstate);
+}
+
#define INIT_THREAD { \
.sp = sizeof(init_stack) + (long)&init_stack, \
}
@@ -57,7 +70,7 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
static inline void wait_for_interrupt(void)
@@ -70,6 +83,7 @@ int riscv_of_processor_hartid(struct device_node *node);
int riscv_of_parent_hartid(struct device_node *node);
extern void riscv_fill_hwcap(void);
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index ee49f80c9533..6ecd461129d2 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -8,6 +8,7 @@
#include <uapi/asm/ptrace.h>
#include <asm/csr.h>
+#include <linux/compiler.h>
#ifndef __ASSEMBLY__
@@ -60,6 +61,7 @@ struct pt_regs {
#define user_mode(regs) (((regs)->status & SR_PP) == 0)
+#define MAX_REG_OFFSET offsetof(struct pt_regs, orig_a0)
/* Helpers for working with the instruction pointer */
static inline unsigned long instruction_pointer(struct pt_regs *regs)
@@ -85,6 +87,12 @@ static inline void user_stack_pointer_set(struct pt_regs *regs,
regs->sp = val;
}
+/* Valid only for Kernel mode traps. */
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+ return regs->sp;
+}
+
/* Helpers for working with the frame pointer */
static inline unsigned long frame_pointer(struct pt_regs *regs)
{
@@ -101,6 +109,69 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->a0;
}
+static inline void regs_set_return_value(struct pt_regs *regs,
+ unsigned long val)
+{
+ regs->a0 = val;
+}
+
+extern int regs_query_register_offset(const char *name);
+extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+ unsigned int n);
+
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer);
+int do_syscall_trace_enter(struct pt_regs *regs);
+void do_syscall_trace_exit(struct pt_regs *regs);
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs: pt_regs from which register value is gotten
+ * @offset: offset of the register.
+ *
+ * regs_get_register returns the value of a register whose offset from @regs.
+ * The @offset is the offset of the register in struct pt_regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+ unsigned int offset)
+{
+ if (unlikely(offset > MAX_REG_OFFSET))
+ return 0;
+
+ return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+/**
+ * regs_get_kernel_argument() - get Nth function argument in kernel
+ * @regs: pt_regs of that context
+ * @n: function argument number (start from 0)
+ *
+ * regs_get_argument() returns @n th argument of the function call.
+ *
+ * Note you can get the parameter correctly if the function has no
+ * more than eight arguments.
+ */
+static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
+ unsigned int n)
+{
+ static const int nr_reg_arguments = 8;
+ static const unsigned int argument_offs[] = {
+ offsetof(struct pt_regs, a0),
+ offsetof(struct pt_regs, a1),
+ offsetof(struct pt_regs, a2),
+ offsetof(struct pt_regs, a3),
+ offsetof(struct pt_regs, a4),
+ offsetof(struct pt_regs, a5),
+ offsetof(struct pt_regs, a6),
+ offsetof(struct pt_regs, a7),
+ };
+
+ if (n < nr_reg_arguments)
+ return regs_get_register(regs, argument_offs[n]);
+ return 0;
+}
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_PTRACE_H */
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 653edb25d495..9e3c2cf1edaf 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -8,6 +8,7 @@
#define _ASM_RISCV_SBI_H
#include <linux/types.h>
+#include <linux/cpumask.h>
#ifdef CONFIG_RISCV_SBI
enum sbi_ext_id {
@@ -27,6 +28,16 @@ enum sbi_ext_id {
SBI_EXT_IPI = 0x735049,
SBI_EXT_RFENCE = 0x52464E43,
SBI_EXT_HSM = 0x48534D,
+ SBI_EXT_SRST = 0x53525354,
+ SBI_EXT_PMU = 0x504D55,
+
+ /* Experimentals extensions must lie within this range */
+ SBI_EXT_EXPERIMENTAL_START = 0x08000000,
+ SBI_EXT_EXPERIMENTAL_END = 0x08FFFFFF,
+
+ /* Vendor extensions must lie within this range */
+ SBI_EXT_VENDOR_START = 0x09000000,
+ SBI_EXT_VENDOR_END = 0x09FFFFFF,
};
enum sbi_ext_base_fid {
@@ -51,25 +62,149 @@ enum sbi_ext_rfence_fid {
SBI_EXT_RFENCE_REMOTE_FENCE_I = 0,
SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
- SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID,
- SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
+ SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID,
+ SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
};
enum sbi_ext_hsm_fid {
SBI_EXT_HSM_HART_START = 0,
SBI_EXT_HSM_HART_STOP,
SBI_EXT_HSM_HART_STATUS,
+ SBI_EXT_HSM_HART_SUSPEND,
+};
+
+enum sbi_hsm_hart_state {
+ SBI_HSM_STATE_STARTED = 0,
+ SBI_HSM_STATE_STOPPED,
+ SBI_HSM_STATE_START_PENDING,
+ SBI_HSM_STATE_STOP_PENDING,
+ SBI_HSM_STATE_SUSPENDED,
+ SBI_HSM_STATE_SUSPEND_PENDING,
+ SBI_HSM_STATE_RESUME_PENDING,
+};
+
+#define SBI_HSM_SUSP_BASE_MASK 0x7fffffff
+#define SBI_HSM_SUSP_NON_RET_BIT 0x80000000
+#define SBI_HSM_SUSP_PLAT_BASE 0x10000000
+
+#define SBI_HSM_SUSPEND_RET_DEFAULT 0x00000000
+#define SBI_HSM_SUSPEND_RET_PLATFORM SBI_HSM_SUSP_PLAT_BASE
+#define SBI_HSM_SUSPEND_RET_LAST SBI_HSM_SUSP_BASE_MASK
+#define SBI_HSM_SUSPEND_NON_RET_DEFAULT SBI_HSM_SUSP_NON_RET_BIT
+#define SBI_HSM_SUSPEND_NON_RET_PLATFORM (SBI_HSM_SUSP_NON_RET_BIT | \
+ SBI_HSM_SUSP_PLAT_BASE)
+#define SBI_HSM_SUSPEND_NON_RET_LAST (SBI_HSM_SUSP_NON_RET_BIT | \
+ SBI_HSM_SUSP_BASE_MASK)
+
+enum sbi_ext_srst_fid {
+ SBI_EXT_SRST_RESET = 0,
+};
+
+enum sbi_srst_reset_type {
+ SBI_SRST_RESET_TYPE_SHUTDOWN = 0,
+ SBI_SRST_RESET_TYPE_COLD_REBOOT,
+ SBI_SRST_RESET_TYPE_WARM_REBOOT,
+};
+
+enum sbi_srst_reset_reason {
+ SBI_SRST_RESET_REASON_NONE = 0,
+ SBI_SRST_RESET_REASON_SYS_FAILURE,
+};
+
+enum sbi_ext_pmu_fid {
+ SBI_EXT_PMU_NUM_COUNTERS = 0,
+ SBI_EXT_PMU_COUNTER_GET_INFO,
+ SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ SBI_EXT_PMU_COUNTER_START,
+ SBI_EXT_PMU_COUNTER_STOP,
+ SBI_EXT_PMU_COUNTER_FW_READ,
};
-enum sbi_hsm_hart_status {
- SBI_HSM_HART_STATUS_STARTED = 0,
- SBI_HSM_HART_STATUS_STOPPED,
- SBI_HSM_HART_STATUS_START_PENDING,
- SBI_HSM_HART_STATUS_STOP_PENDING,
+#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(55, 0)
+#define RISCV_PMU_RAW_EVENT_IDX 0x20000
+
+/** General pmu event codes specified in SBI PMU extension */
+enum sbi_pmu_hw_generic_events_t {
+ SBI_PMU_HW_NO_EVENT = 0,
+ SBI_PMU_HW_CPU_CYCLES = 1,
+ SBI_PMU_HW_INSTRUCTIONS = 2,
+ SBI_PMU_HW_CACHE_REFERENCES = 3,
+ SBI_PMU_HW_CACHE_MISSES = 4,
+ SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5,
+ SBI_PMU_HW_BRANCH_MISSES = 6,
+ SBI_PMU_HW_BUS_CYCLES = 7,
+ SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8,
+ SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9,
+ SBI_PMU_HW_REF_CPU_CYCLES = 10,
+
+ SBI_PMU_HW_GENERAL_MAX,
};
+/**
+ * Special "firmware" events provided by the firmware, even if the hardware
+ * does not support performance events. These events are encoded as a raw
+ * event type in Linux kernel perf framework.
+ */
+enum sbi_pmu_fw_generic_events_t {
+ SBI_PMU_FW_MISALIGNED_LOAD = 0,
+ SBI_PMU_FW_MISALIGNED_STORE = 1,
+ SBI_PMU_FW_ACCESS_LOAD = 2,
+ SBI_PMU_FW_ACCESS_STORE = 3,
+ SBI_PMU_FW_ILLEGAL_INSN = 4,
+ SBI_PMU_FW_SET_TIMER = 5,
+ SBI_PMU_FW_IPI_SENT = 6,
+ SBI_PMU_FW_IPI_RECVD = 7,
+ SBI_PMU_FW_FENCE_I_SENT = 8,
+ SBI_PMU_FW_FENCE_I_RECVD = 9,
+ SBI_PMU_FW_SFENCE_VMA_SENT = 10,
+ SBI_PMU_FW_SFENCE_VMA_RCVD = 11,
+ SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12,
+ SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13,
+
+ SBI_PMU_FW_HFENCE_GVMA_SENT = 14,
+ SBI_PMU_FW_HFENCE_GVMA_RCVD = 15,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17,
+
+ SBI_PMU_FW_HFENCE_VVMA_SENT = 18,
+ SBI_PMU_FW_HFENCE_VVMA_RCVD = 19,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21,
+ SBI_PMU_FW_MAX,
+};
+
+/* SBI PMU event types */
+enum sbi_pmu_event_type {
+ SBI_PMU_EVENT_TYPE_HW = 0x0,
+ SBI_PMU_EVENT_TYPE_CACHE = 0x1,
+ SBI_PMU_EVENT_TYPE_RAW = 0x2,
+ SBI_PMU_EVENT_TYPE_FW = 0xf,
+};
+
+/* SBI PMU event types */
+enum sbi_pmu_ctr_type {
+ SBI_PMU_CTR_TYPE_HW = 0x0,
+ SBI_PMU_CTR_TYPE_FW,
+};
+
+/* Flags defined for config matching function */
+#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0)
+#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1)
+#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2)
+#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3)
+#define SBI_PMU_CFG_FLAG_SET_VSNH (1 << 4)
+#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5)
+#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6)
+#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7)
+
+/* Flags defined for counter start function */
+#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0)
+
+/* Flags defined for counter stop function */
+#define SBI_PMU_STOP_FLAG_RESET (1 << 0)
+
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
@@ -82,6 +217,9 @@ enum sbi_hsm_hart_status {
#define SBI_ERR_INVALID_PARAM -3
#define SBI_ERR_DENIED -4
#define SBI_ERR_INVALID_ADDRESS -5
+#define SBI_ERR_ALREADY_AVAILABLE -6
+#define SBI_ERR_ALREADY_STARTED -7
+#define SBI_ERR_ALREADY_STOPPED -8
extern unsigned long sbi_spec_version;
struct sbiret {
@@ -89,7 +227,7 @@ struct sbiret {
long value;
};
-int sbi_init(void);
+void sbi_init(void);
struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
unsigned long arg1, unsigned long arg2,
unsigned long arg3, unsigned long arg4,
@@ -97,30 +235,33 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
void sbi_console_putchar(int ch);
int sbi_console_getchar(void);
+long sbi_get_mvendorid(void);
+long sbi_get_marchid(void);
+long sbi_get_mimpid(void);
void sbi_set_timer(uint64_t stime_value);
void sbi_shutdown(void);
void sbi_clear_ipi(void);
-void sbi_send_ipi(const unsigned long *hart_mask);
-void sbi_remote_fence_i(const unsigned long *hart_mask);
-void sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_send_ipi(const struct cpumask *cpu_mask);
+int sbi_remote_fence_i(const struct cpumask *cpu_mask);
+int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
-int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long vmid);
-int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
@@ -145,13 +286,17 @@ static inline unsigned long sbi_minor_version(void)
return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK;
}
+/* Make SBI version */
+static inline unsigned long sbi_mk_version(unsigned long major,
+ unsigned long minor)
+{
+ return ((major & SBI_SPEC_VERSION_MAJOR_MASK) <<
+ SBI_SPEC_VERSION_MAJOR_SHIFT) | minor;
+}
+
int sbi_err_map_linux_errno(int err);
#else /* CONFIG_RISCV_SBI */
-/* stubs for code that is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */
-void sbi_set_timer(uint64_t stime_value);
-void sbi_clear_ipi(void);
-void sbi_send_ipi(const unsigned long *hart_mask);
-void sbi_remote_fence_i(const unsigned long *hart_mask);
-void sbi_init(void);
+static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
+static inline void sbi_init(void) {}
#endif /* CONFIG_RISCV_SBI */
#endif /* _ASM_RISCV_SBI_H */
diff --git a/arch/riscv/include/asm/seccomp.h b/arch/riscv/include/asm/seccomp.h
index bf7744ee3b3d..c7ee6a3507be 100644
--- a/arch/riscv/include/asm/seccomp.h
+++ b/arch/riscv/include/asm/seccomp.h
@@ -7,4 +7,14 @@
#include <asm-generic/seccomp.h>
+#ifdef CONFIG_64BIT
+# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_RISCV64
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "riscv64"
+#else /* !CONFIG_64BIT */
+# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_RISCV32
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "riscv32"
+#endif
+
#endif /* _ASM_SECCOMP_H */
diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
new file mode 100644
index 000000000000..32336e8a17cb
--- /dev/null
+++ b/arch/riscv/include/asm/sections.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef __ASM_SECTIONS_H
+#define __ASM_SECTIONS_H
+
+#include <asm-generic/sections.h>
+#include <linux/mm.h>
+
+extern char _start[];
+extern char _start_kernel[];
+extern char __init_data_begin[], __init_data_end[];
+extern char __init_text_begin[], __init_text_end[];
+extern char __alt_start[], __alt_end[];
+
+static inline bool is_va_kernel_text(uintptr_t va)
+{
+ uintptr_t start = (uintptr_t)_start;
+ uintptr_t end = (uintptr_t)__init_data_begin;
+
+ return va >= start && va < end;
+}
+
+static inline bool is_va_kernel_lm_alias_text(uintptr_t va)
+{
+ uintptr_t start = (uintptr_t)lm_alias(_start);
+ uintptr_t end = (uintptr_t)lm_alias(__init_data_begin);
+
+ return va >= start && va < end;
+}
+
+#endif /* __ASM_SECTIONS_H */
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index 4c5bae7ca01c..a2c14d4b3993 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -15,26 +15,45 @@ int set_memory_ro(unsigned long addr, int numpages);
int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_rw_nx(unsigned long addr, int numpages);
+static __always_inline int set_kernel_memory(char *startp, char *endp,
+ int (*set_memory)(unsigned long start,
+ int num_pages))
+{
+ unsigned long start = (unsigned long)startp;
+ unsigned long end = (unsigned long)endp;
+ int num_pages = PAGE_ALIGN(end - start) >> PAGE_SHIFT;
+
+ return set_memory(start, num_pages);
+}
#else
static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
+static inline int set_kernel_memory(char *startp, char *endp,
+ int (*set_memory)(unsigned long start,
+ int num_pages))
+{
+ return 0;
+}
#endif
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
+bool kernel_page_present(struct page *page);
#endif /* __ASSEMBLY__ */
-#ifdef CONFIG_ARCH_HAS_STRICT_KERNEL_RWX
+#ifdef CONFIG_STRICT_KERNEL_RWX
#ifdef CONFIG_64BIT
#define SECTION_ALIGN (1 << 21)
#else
#define SECTION_ALIGN (1 << 22)
#endif
-#else /* !CONFIG_ARCH_HAS_STRICT_KERNEL_RWX */
+#else /* !CONFIG_STRICT_KERNEL_RWX */
#define SECTION_ALIGN L1_CACHE_BYTES
-#endif /* CONFIG_ARCH_HAS_STRICT_KERNEL_RWX */
+#endif /* CONFIG_STRICT_KERNEL_RWX */
#endif /* _ASM_RISCV_SET_MEMORY_H */
diff --git a/arch/riscv/include/asm/signal32.h b/arch/riscv/include/asm/signal32.h
new file mode 100644
index 000000000000..96dc56932e76
--- /dev/null
+++ b/arch/riscv/include/asm/signal32.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_SIGNAL32_H
+#define __ASM_SIGNAL32_H
+
+#if IS_ENABLED(CONFIG_COMPAT)
+int compat_setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs);
+#else
+static inline
+int compat_setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
+{
+ return -1;
+}
+#endif
+
+#endif
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index 40bb1c15a731..23170c933d73 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -15,6 +15,11 @@
struct seq_file;
extern unsigned long boot_cpu_hartid;
+struct riscv_ipi_ops {
+ void (*ipi_inject)(const struct cpumask *target);
+ void (*ipi_clear)(void);
+};
+
#ifdef CONFIG_SMP
/*
* Mapping between linux logical cpu index and hartid.
@@ -38,7 +43,15 @@ void arch_send_call_function_ipi_mask(struct cpumask *mask);
void arch_send_call_function_single_ipi(int cpu);
int riscv_hartid_to_cpuid(int hartid);
-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
+
+/* Set custom IPI operations */
+void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops);
+
+/* Clear IPI for current CPU */
+void riscv_clear_ipi(void);
+
+/* Secondary hart entry */
+asmlinkage void smp_callin(void);
/*
* Obtains the hart ID of the currently executing task. This relies on
@@ -49,8 +62,6 @@ void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
#if defined CONFIG_HOTPLUG_CPU
int __cpu_disable(void);
void __cpu_die(unsigned int cpu);
-void cpu_stop(void);
-#else
#endif /* CONFIG_HOTPLUG_CPU */
#else
@@ -71,11 +82,12 @@ static inline unsigned long cpuid_to_hartid_map(int cpu)
return boot_cpu_hartid;
}
-static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in,
- struct cpumask *out)
+static inline void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops)
+{
+}
+
+static inline void riscv_clear_ipi(void)
{
- cpumask_clear(out);
- cpumask_set_cpu(boot_cpu_hartid, out);
}
#endif /* CONFIG_SMP */
diff --git a/arch/riscv/include/asm/soc.h b/arch/riscv/include/asm/soc.h
index 136a442ef876..f494066051a2 100644
--- a/arch/riscv/include/asm/soc.h
+++ b/arch/riscv/include/asm/soc.h
@@ -13,7 +13,7 @@
#define SOC_EARLY_INIT_DECLARE(name, compat, fn) \
static const struct of_device_id __soc_early_init__##name \
- __used __section(__soc_early_init_table) \
+ __used __section("__soc_early_init_table") \
= { .compatible = compat, .data = fn }
void soc_early_init(void);
@@ -21,42 +21,4 @@ void soc_early_init(void);
extern unsigned long __soc_early_init_table_start;
extern unsigned long __soc_early_init_table_end;
-/*
- * Allows Linux to provide a device tree, which is necessary for SOCs that
- * don't provide a useful one on their own.
- */
-struct soc_builtin_dtb {
- unsigned long vendor_id;
- unsigned long arch_id;
- unsigned long imp_id;
- void *(*dtb_func)(void);
-};
-
-/*
- * The argument name must specify a valid DTS file name without the dts
- * extension.
- */
-#define SOC_BUILTIN_DTB_DECLARE(name, vendor, arch, impl) \
- extern void *__dtb_##name##_begin; \
- \
- static __init __used \
- void *__soc_builtin_dtb_f__##name(void) \
- { \
- return (void *)&__dtb_##name##_begin; \
- } \
- \
- static const struct soc_builtin_dtb __soc_builtin_dtb__##name \
- __used __section(__soc_builtin_dtb_table) = \
- { \
- .vendor_id = vendor, \
- .arch_id = arch, \
- .imp_id = impl, \
- .dtb_func = __soc_builtin_dtb_f__##name, \
- }
-
-extern unsigned long __soc_builtin_dtb_table_start;
-extern unsigned long __soc_builtin_dtb_table_end;
-
-void *soc_lookup_builtin_dtb(void);
-
#endif
diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h
index 45a7018a8118..63acaecc3374 100644
--- a/arch/riscv/include/asm/sparsemem.h
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -4,7 +4,11 @@
#define _ASM_RISCV_SPARSEMEM_H
#ifdef CONFIG_SPARSEMEM
-#define MAX_PHYSMEM_BITS CONFIG_PA_BITS
+#ifdef CONFIG_64BIT
+#define MAX_PHYSMEM_BITS 56
+#else
+#define MAX_PHYSMEM_BITS 34
+#endif /* CONFIG_64BIT */
#define SECTION_SIZE_BITS 27
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
deleted file mode 100644
index f4f7fa1b7ca8..000000000000
--- a/arch/riscv/include/asm/spinlock.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 Regents of the University of California
- * Copyright (C) 2017 SiFive
- */
-
-#ifndef _ASM_RISCV_SPINLOCK_H
-#define _ASM_RISCV_SPINLOCK_H
-
-#include <linux/kernel.h>
-#include <asm/current.h>
-#include <asm/fence.h>
-
-/*
- * Simple spin lock operations. These provide no fairness guarantees.
- */
-
-/* FIXME: Replace this with a ticket lock, like MIPS. */
-
-#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
- smp_store_release(&lock->lock, 0);
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
- int tmp = 1, busy;
-
- __asm__ __volatile__ (
- " amoswap.w %0, %2, %1\n"
- RISCV_ACQUIRE_BARRIER
- : "=r" (busy), "+A" (lock->lock)
- : "r" (tmp)
- : "memory");
-
- return !busy;
-}
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
- while (1) {
- if (arch_spin_is_locked(lock))
- continue;
-
- if (arch_spin_trylock(lock))
- break;
- }
-}
-
-/***********************************************************/
-
-static inline void arch_read_lock(arch_rwlock_t *lock)
-{
- int tmp;
-
- __asm__ __volatile__(
- "1: lr.w %1, %0\n"
- " bltz %1, 1b\n"
- " addi %1, %1, 1\n"
- " sc.w %1, %1, %0\n"
- " bnez %1, 1b\n"
- RISCV_ACQUIRE_BARRIER
- : "+A" (lock->lock), "=&r" (tmp)
- :: "memory");
-}
-
-static inline void arch_write_lock(arch_rwlock_t *lock)
-{
- int tmp;
-
- __asm__ __volatile__(
- "1: lr.w %1, %0\n"
- " bnez %1, 1b\n"
- " li %1, -1\n"
- " sc.w %1, %1, %0\n"
- " bnez %1, 1b\n"
- RISCV_ACQUIRE_BARRIER
- : "+A" (lock->lock), "=&r" (tmp)
- :: "memory");
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *lock)
-{
- int busy;
-
- __asm__ __volatile__(
- "1: lr.w %1, %0\n"
- " bltz %1, 1f\n"
- " addi %1, %1, 1\n"
- " sc.w %1, %1, %0\n"
- " bnez %1, 1b\n"
- RISCV_ACQUIRE_BARRIER
- "1:\n"
- : "+A" (lock->lock), "=&r" (busy)
- :: "memory");
-
- return !busy;
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *lock)
-{
- int busy;
-
- __asm__ __volatile__(
- "1: lr.w %1, %0\n"
- " bnez %1, 1f\n"
- " li %1, -1\n"
- " sc.w %1, %1, %0\n"
- " bnez %1, 1b\n"
- RISCV_ACQUIRE_BARRIER
- "1:\n"
- : "+A" (lock->lock), "=&r" (busy)
- :: "memory");
-
- return !busy;
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *lock)
-{
- __asm__ __volatile__(
- RISCV_RELEASE_BARRIER
- " amoadd.w x0, %1, %0\n"
- : "+A" (lock->lock)
- : "r" (-1)
- : "memory");
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *lock)
-{
- smp_store_release(&lock->lock, 0);
-}
-
-#endif /* _ASM_RISCV_SPINLOCK_H */
diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h
deleted file mode 100644
index f398e7638dd6..000000000000
--- a/arch/riscv/include/asm/spinlock_types.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 Regents of the University of California
- */
-
-#ifndef _ASM_RISCV_SPINLOCK_TYPES_H
-#define _ASM_RISCV_SPINLOCK_TYPES_H
-
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
-typedef struct {
- volatile unsigned int lock;
-} arch_spinlock_t;
-
-#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
-
-typedef struct {
- volatile unsigned int lock;
-} arch_rwlock_t;
-
-#define __ARCH_RW_LOCK_UNLOCKED { 0 }
-
-#endif /* _ASM_RISCV_SPINLOCK_TYPES_H */
diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h
new file mode 100644
index 000000000000..09093af46565
--- /dev/null
+++ b/arch/riscv/include/asm/stackprotector.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_STACKPROTECTOR_H
+#define _ASM_RISCV_STACKPROTECTOR_H
+
+#include <linux/random.h>
+#include <linux/version.h>
+
+extern unsigned long __stack_chk_guard;
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+ unsigned long canary;
+
+ /* Try to get a semi random initial value. */
+ get_random_bytes(&canary, sizeof(canary));
+ canary ^= LINUX_VERSION_CODE;
+ canary &= CANARY_MASK;
+
+ current->stack_canary = canary;
+ if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK))
+ __stack_chk_guard = current->stack_canary;
+}
+#endif /* _ASM_RISCV_STACKPROTECTOR_H */
diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h
new file mode 100644
index 000000000000..3450c1912afd
--- /dev/null
+++ b/arch/riscv/include/asm/stacktrace.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_STACKTRACE_H
+#define _ASM_RISCV_STACKTRACE_H
+
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+struct stackframe {
+ unsigned long fp;
+ unsigned long ra;
+};
+
+extern void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
+ bool (*fn)(void *, unsigned long), void *arg);
+extern void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
+ const char *loglvl);
+
+#endif /* _ASM_RISCV_STACKTRACE_H */
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 924af13f8555..909049366555 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -12,16 +12,21 @@
#define __HAVE_ARCH_MEMSET
extern asmlinkage void *memset(void *, int, size_t);
extern asmlinkage void *__memset(void *, int, size_t);
-
#define __HAVE_ARCH_MEMCPY
extern asmlinkage void *memcpy(void *, const void *, size_t);
extern asmlinkage void *__memcpy(void *, const void *, size_t);
-
+#define __HAVE_ARCH_MEMMOVE
+extern asmlinkage void *memmove(void *, const void *, size_t);
+extern asmlinkage void *__memmove(void *, const void *, size_t);
/* For those files which don't want to check by kasan. */
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
-
#define memcpy(dst, src, len) __memcpy(dst, src, len)
#define memset(s, c, n) __memset(s, c, n)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
#endif
#endif /* _ASM_RISCV_STRING_H */
diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h
new file mode 100644
index 000000000000..8be391c2aecb
--- /dev/null
+++ b/arch/riscv/include/asm/suspend.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (c) 2022 Ventana Micro Systems Inc.
+ */
+
+#ifndef _ASM_RISCV_SUSPEND_H
+#define _ASM_RISCV_SUSPEND_H
+
+#include <asm/ptrace.h>
+
+struct suspend_context {
+ /* Saved and restored by low-level functions */
+ struct pt_regs regs;
+ /* Saved and restored by high-level functions */
+ unsigned long scratch;
+ unsigned long tvec;
+ unsigned long ie;
+#ifdef CONFIG_MMU
+ unsigned long satp;
+#endif
+};
+
+/* Low-level CPU suspend entry function */
+int __cpu_suspend_enter(struct suspend_context *context);
+
+/* High-level CPU suspend which will save context and call finish() */
+int cpu_suspend(unsigned long arg,
+ int (*finish)(unsigned long arg,
+ unsigned long entry,
+ unsigned long context));
+
+/* Low-level CPU resume entry function */
+int __cpu_resume_enter(unsigned long hartid, unsigned long context);
+
+#endif
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 407bcc96a710..0a3f4f95c555 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -6,6 +6,7 @@
#ifndef _ASM_RISCV_SWITCH_TO_H
#define _ASM_RISCV_SWITCH_TO_H
+#include <linux/jump_label.h>
#include <linux/sched/task_stack.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
@@ -55,9 +56,13 @@ static inline void __switch_to_aux(struct task_struct *prev,
fstate_restore(next, task_pt_regs(next));
}
-extern bool has_fpu;
+extern struct static_key_false cpu_hwcap_fpu;
+static __always_inline bool has_fpu(void)
+{
+ return static_branch_likely(&cpu_hwcap_fpu);
+}
#else
-#define has_fpu false
+static __always_inline bool has_fpu(void) { return false; }
#define fstate_save(task, regs) do { } while (0)
#define fstate_restore(task, regs) do { } while (0)
#define __switch_to_aux(__prev, __next) do { } while (0)
@@ -70,7 +75,7 @@ extern struct task_struct *__switch_to(struct task_struct *,
do { \
struct task_struct *__prev = (prev); \
struct task_struct *__next = (next); \
- if (has_fpu) \
+ if (has_fpu()) \
__switch_to_aux(__prev, __next); \
((last) = __switch_to(__prev, __next)); \
} while (0)
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 49350c8bd7b0..384a63b86420 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -15,7 +15,8 @@
#include <linux/err.h>
/* The array of function pointers for syscalls. */
-extern void *sys_call_table[];
+extern void * const sys_call_table[];
+extern void * const compat_sys_call_table[];
/*
* Only the low 32 bits of orig_r0 are meaningful, so we return int.
@@ -64,15 +65,6 @@ static inline void syscall_get_arguments(struct task_struct *task,
memcpy(args, &regs->a1, 5 * sizeof(args[0]));
}
-static inline void syscall_set_arguments(struct task_struct *task,
- struct pt_regs *regs,
- const unsigned long *args)
-{
- regs->orig_a0 = args[0];
- args++;
- memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
-}
-
static inline int syscall_get_arch(struct task_struct *task)
{
#ifdef CONFIG_64BIT
@@ -82,4 +74,5 @@ static inline int syscall_get_arch(struct task_struct *task)
#endif
}
+asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
#endif /* _ASM_RISCV_SYSCALL_H */
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 1dd12a0cbb2b..78933ac04995 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -11,19 +11,40 @@
#include <asm/page.h>
#include <linux/const.h>
+#ifdef CONFIG_KASAN
+#define KASAN_STACK_ORDER 1
+#else
+#define KASAN_STACK_ORDER 0
+#endif
+
/* thread information allocation */
-#define THREAD_SIZE_ORDER (1)
+#ifdef CONFIG_64BIT
+#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
+#else
+#define THREAD_SIZE_ORDER (1 + KASAN_STACK_ORDER)
+#endif
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+/*
+ * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
+ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
+ * assembly.
+ */
+#ifdef CONFIG_VMAP_STACK
+#define THREAD_ALIGN (2 * THREAD_SIZE)
+#else
+#define THREAD_ALIGN THREAD_SIZE
+#endif
+
+#define THREAD_SHIFT (PAGE_SHIFT + THREAD_SIZE_ORDER)
+#define OVERFLOW_STACK_SIZE SZ_4K
+#define SHADOW_OVERFLOW_STACK_SIZE (1024)
+
#ifndef __ASSEMBLY__
#include <asm/processor.h>
#include <asm/csr.h>
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
-
/*
* low level task data that entry.S needs immediate access to
* - this struct should fit entirely inside of one cache line
@@ -35,7 +56,6 @@ typedef struct {
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0=>preemptible, <0=>BUG */
- mm_segment_t addr_limit;
/*
* These stack pointers are overwritten on every system call or
* exception. SP is also saved to the stack it can be recovered when
@@ -55,7 +75,6 @@ struct thread_info {
{ \
.flags = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .addr_limit = KERNEL_DS, \
}
#endif /* !__ASSEMBLY__ */
@@ -76,6 +95,9 @@ struct thread_info {
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
#define TIF_SECCOMP 8 /* syscall secure computing */
+#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
+#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
+#define TIF_32BIT 11 /* compat-mode 32bit process */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -84,9 +106,12 @@ struct thread_info {
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_WORK_MASK \
- (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED)
+ (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+ _TIF_NOTIFY_SIGNAL | _TIF_UPROBE)
#define _TIF_SYSCALL_WORK \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index bad2a7c2cda5..d6a7428f6248 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -7,41 +7,67 @@
#define _ASM_RISCV_TIMEX_H
#include <asm/csr.h>
-#include <asm/mmio.h>
typedef unsigned long cycles_t;
-extern u64 __iomem *riscv_time_val;
-extern u64 __iomem *riscv_time_cmp;
+#ifdef CONFIG_RISCV_M_MODE
-#ifdef CONFIG_64BIT
-#define mmio_get_cycles() readq_relaxed(riscv_time_val)
-#else
-#define mmio_get_cycles() readl_relaxed(riscv_time_val)
-#define mmio_get_cycles_hi() readl_relaxed(((u32 *)riscv_time_val) + 1)
-#endif
+#include <asm/clint.h>
+#ifdef CONFIG_64BIT
static inline cycles_t get_cycles(void)
{
- if (IS_ENABLED(CONFIG_RISCV_SBI))
- return csr_read(CSR_TIME);
- return mmio_get_cycles();
+ return readq_relaxed(clint_time_val);
+}
+#else /* !CONFIG_64BIT */
+static inline u32 get_cycles(void)
+{
+ return readl_relaxed(((u32 *)clint_time_val));
}
#define get_cycles get_cycles
-#ifdef CONFIG_64BIT
-static inline u64 get_cycles64(void)
+static inline u32 get_cycles_hi(void)
{
+ return readl_relaxed(((u32 *)clint_time_val) + 1);
+}
+#define get_cycles_hi get_cycles_hi
+#endif /* CONFIG_64BIT */
+
+/*
+ * Much like MIPS, we may not have a viable counter to use at an early point
+ * in the boot process. Unfortunately we don't have a fallback, so instead
+ * we just return 0.
+ */
+static inline unsigned long random_get_entropy(void)
+{
+ if (unlikely(clint_time_val == NULL))
+ return random_get_entropy_fallback();
return get_cycles();
}
-#else /* CONFIG_64BIT */
+#define random_get_entropy() random_get_entropy()
+
+#else /* CONFIG_RISCV_M_MODE */
+
+static inline cycles_t get_cycles(void)
+{
+ return csr_read(CSR_TIME);
+}
+#define get_cycles get_cycles
+
static inline u32 get_cycles_hi(void)
{
- if (IS_ENABLED(CONFIG_RISCV_SBI))
- return csr_read(CSR_TIMEH);
- return mmio_get_cycles_hi();
+ return csr_read(CSR_TIMEH);
}
+#define get_cycles_hi get_cycles_hi
+
+#endif /* !CONFIG_RISCV_M_MODE */
+#ifdef CONFIG_64BIT
+static inline u64 get_cycles64(void)
+{
+ return get_cycles();
+}
+#else /* CONFIG_64BIT */
static inline u64 get_cycles64(void)
{
u32 hi, lo;
@@ -62,4 +88,6 @@ static inline int read_current_timer(unsigned long *timer_val)
return 0;
}
+extern void time_init(void);
+
#endif /* _ASM_RISCV_TIMEX_H */
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 394cfbccdcd9..801019381dea 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -9,6 +9,7 @@
#include <linux/mm_types.h>
#include <asm/smp.h>
+#include <asm/errata_list.h>
#ifdef CONFIG_MMU
static inline void local_flush_tlb_all(void)
@@ -19,7 +20,7 @@ static inline void local_flush_tlb_all(void)
/* Flush one page from local TLB */
static inline void local_flush_tlb_page(unsigned long addr)
{
- __asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");
+ ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
}
#else /* CONFIG_MMU */
#define local_flush_tlb_all() do { } while (0)
@@ -32,6 +33,11 @@ void flush_tlb_mm(struct mm_struct *mm);
void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end);
+#endif
#else /* CONFIG_SMP && CONFIG_MMU */
#define flush_tlb_all() local_flush_tlb_all()
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 8ce9d607b53d..855450bed9f5 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -8,27 +8,12 @@
#ifndef _ASM_RISCV_UACCESS_H
#define _ASM_RISCV_UACCESS_H
+#include <asm/asm-extable.h>
+#include <asm/pgtable.h> /* for TASK_SIZE */
+
/*
* User space memory access functions
*/
-
-extern unsigned long __must_check __asm_copy_to_user(void __user *to,
- const void *from, unsigned long n);
-extern unsigned long __must_check __asm_copy_from_user(void *to,
- const void __user *from, unsigned long n);
-
-static inline unsigned long
-raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- return __asm_copy_from_user(to, from, n);
-}
-
-static inline unsigned long
-raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- return __asm_copy_to_user(to, from, n);
-}
-
#ifdef CONFIG_MMU
#include <linux/errno.h>
#include <linux/compiler.h>
@@ -36,6 +21,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
#include <asm/byteorder.h>
#include <asm/extable.h>
#include <asm/asm.h>
+#include <asm-generic/access_ok.h>
#define __enable_user_access() \
__asm__ __volatile__ ("csrs sstatus, %0" : : "r" (SR_SUM) : "memory")
@@ -43,63 +29,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
__asm__ __volatile__ ("csrc sstatus, %0" : : "r" (SR_SUM) : "memory")
/*
- * The fs value determines whether argument validity checking should be
- * performed or not. If get_fs() == USER_DS, checking is performed, with
- * get_fs() == KERNEL_DS, checking is bypassed.
- *
- * For historical reasons, these macros are grossly misnamed.
- */
-
-#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
-
-#define KERNEL_DS MAKE_MM_SEG(~0UL)
-#define USER_DS MAKE_MM_SEG(TASK_SIZE)
-
-#define get_fs() (current_thread_info()->addr_limit)
-
-static inline void set_fs(mm_segment_t fs)
-{
- current_thread_info()->addr_limit = fs;
-}
-
-#define segment_eq(a, b) ((a).seg == (b).seg)
-
-#define user_addr_max() (get_fs().seg)
-
-
-/**
- * access_ok: - Checks if a user space pointer is valid
- * @addr: User space pointer to start of block to check
- * @size: Size of block to check
- *
- * Context: User context only. This function may sleep.
- *
- * Checks if a pointer to a block of memory in user space is valid.
- *
- * Returns true (nonzero) if the memory block may be valid, false (zero)
- * if it is definitely invalid.
- *
- * Note that, depending on architecture, this function probably just
- * checks that the pointer is in the user space range - after calling
- * this function, memory access functions may still return -EFAULT.
- */
-#define access_ok(addr, size) ({ \
- __chk_user_ptr(addr); \
- likely(__access_ok((unsigned long __force)(addr), (size))); \
-})
-
-/*
- * Ensure that the range [addr, addr+size) is within the process's
- * address space
- */
-static inline int __access_ok(unsigned long addr, unsigned long size)
-{
- const mm_segment_t fs = get_fs();
-
- return size <= fs.seg && addr <= fs.seg - size;
-}
-
-/*
* The exception table consists of pairs of addresses: the first is the
* address of an instruction that is allowed to fault, and the second is
* the address at which the program should continue. No registers are
@@ -123,27 +52,14 @@ static inline int __access_ok(unsigned long addr, unsigned long size)
#define __get_user_asm(insn, x, ptr, err) \
do { \
- uintptr_t __tmp; \
__typeof__(x) __x; \
- __enable_user_access(); \
__asm__ __volatile__ ( \
"1:\n" \
- " " insn " %1, %3\n" \
+ " " insn " %1, %2\n" \
"2:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "3:\n" \
- " li %0, %4\n" \
- " li %1, 0\n" \
- " jump 2b, %2\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 3b\n" \
- " .previous" \
- : "+r" (err), "=&r" (__x), "=r" (__tmp) \
- : "m" (*(ptr)), "i" (-EFAULT)); \
- __disable_user_access(); \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \
+ : "+r" (err), "=&r" (__x) \
+ : "m" (*(ptr))); \
(x) = __x; \
} while (0)
@@ -155,37 +71,42 @@ do { \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u32 __lo, __hi; \
- uintptr_t __tmp; \
- __enable_user_access(); \
__asm__ __volatile__ ( \
"1:\n" \
- " lw %1, %4\n" \
+ " lw %1, %3\n" \
"2:\n" \
- " lw %2, %5\n" \
+ " lw %2, %4\n" \
"3:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "4:\n" \
- " li %0, %6\n" \
- " li %1, 0\n" \
- " li %2, 0\n" \
- " jump 3b, %3\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 4b\n" \
- " " RISCV_PTR " 2b, 4b\n" \
- " .previous" \
- : "+r" (err), "=&r" (__lo), "=r" (__hi), \
- "=r" (__tmp) \
- : "m" (__ptr[__LSW]), "m" (__ptr[__MSW]), \
- "i" (-EFAULT)); \
- __disable_user_access(); \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 3b, %0, %1) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(2b, 3b, %0, %1) \
+ : "+r" (err), "=&r" (__lo), "=r" (__hi) \
+ : "m" (__ptr[__LSW]), "m" (__ptr[__MSW])); \
+ if (err) \
+ __hi = 0; \
(x) = (__typeof__(x))((__typeof__((x)-(x)))( \
(((u64)__hi << 32) | __lo))); \
} while (0)
#endif /* CONFIG_64BIT */
+#define __get_user_nocheck(x, __gu_ptr, __gu_err) \
+do { \
+ switch (sizeof(*__gu_ptr)) { \
+ case 1: \
+ __get_user_asm("lb", (x), __gu_ptr, __gu_err); \
+ break; \
+ case 2: \
+ __get_user_asm("lh", (x), __gu_ptr, __gu_err); \
+ break; \
+ case 4: \
+ __get_user_asm("lw", (x), __gu_ptr, __gu_err); \
+ break; \
+ case 8: \
+ __get_user_8((x), __gu_ptr, __gu_err); \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ } \
+} while (0)
/**
* __get_user: - Get a simple variable from user space, with less checking.
@@ -209,25 +130,15 @@ do { \
*/
#define __get_user(x, ptr) \
({ \
- register long __gu_err = 0; \
const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \
+ long __gu_err = 0; \
+ \
__chk_user_ptr(__gu_ptr); \
- switch (sizeof(*__gu_ptr)) { \
- case 1: \
- __get_user_asm("lb", (x), __gu_ptr, __gu_err); \
- break; \
- case 2: \
- __get_user_asm("lh", (x), __gu_ptr, __gu_err); \
- break; \
- case 4: \
- __get_user_asm("lw", (x), __gu_ptr, __gu_err); \
- break; \
- case 8: \
- __get_user_8((x), __gu_ptr, __gu_err); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
+ \
+ __enable_user_access(); \
+ __get_user_nocheck(x, __gu_ptr, __gu_err); \
+ __disable_user_access(); \
+ \
__gu_err; \
})
@@ -259,26 +170,14 @@ do { \
#define __put_user_asm(insn, x, ptr, err) \
do { \
- uintptr_t __tmp; \
__typeof__(*(ptr)) __x = x; \
- __enable_user_access(); \
__asm__ __volatile__ ( \
"1:\n" \
- " " insn " %z3, %2\n" \
+ " " insn " %z2, %1\n" \
"2:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "3:\n" \
- " li %0, %4\n" \
- " jump 2b, %1\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 3b\n" \
- " .previous" \
- : "+r" (err), "=r" (__tmp), "=m" (*(ptr)) \
- : "rJ" (__x), "i" (-EFAULT)); \
- __disable_user_access(); \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \
+ : "+r" (err), "=m" (*(ptr)) \
+ : "rJ" (__x)); \
} while (0)
#ifdef CONFIG_64BIT
@@ -289,33 +188,40 @@ do { \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u64 __x = (__typeof__((x)-(x)))(x); \
- uintptr_t __tmp; \
- __enable_user_access(); \
__asm__ __volatile__ ( \
"1:\n" \
- " sw %z4, %2\n" \
+ " sw %z3, %1\n" \
"2:\n" \
- " sw %z5, %3\n" \
+ " sw %z4, %2\n" \
"3:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "4:\n" \
- " li %0, %6\n" \
- " jump 3b, %1\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 4b\n" \
- " " RISCV_PTR " 2b, 4b\n" \
- " .previous" \
- : "+r" (err), "=r" (__tmp), \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \
+ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \
+ : "+r" (err), \
"=m" (__ptr[__LSW]), \
"=m" (__ptr[__MSW]) \
- : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \
- __disable_user_access(); \
+ : "rJ" (__x), "rJ" (__x >> 32)); \
} while (0)
#endif /* CONFIG_64BIT */
+#define __put_user_nocheck(x, __gu_ptr, __pu_err) \
+do { \
+ switch (sizeof(*__gu_ptr)) { \
+ case 1: \
+ __put_user_asm("sb", (x), __gu_ptr, __pu_err); \
+ break; \
+ case 2: \
+ __put_user_asm("sh", (x), __gu_ptr, __pu_err); \
+ break; \
+ case 4: \
+ __put_user_asm("sw", (x), __gu_ptr, __pu_err); \
+ break; \
+ case 8: \
+ __put_user_8((x), __gu_ptr, __pu_err); \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ } \
+} while (0)
/**
* __put_user: - Write a simple value into user space, with less checking.
@@ -329,7 +235,9 @@ do { \
* data types like structures or arrays.
*
* @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
+ * to the result of dereferencing @ptr. The value of @x is copied to avoid
+ * re-ordering where @x is evaluated inside the block that enables user-space
+ * access (thus bypassing user space protection if @x is a function).
*
* Caller must check the pointer with access_ok() before calling this
* function.
@@ -338,25 +246,16 @@ do { \
*/
#define __put_user(x, ptr) \
({ \
- register long __pu_err = 0; \
__typeof__(*(ptr)) __user *__gu_ptr = (ptr); \
+ __typeof__(*__gu_ptr) __val = (x); \
+ long __pu_err = 0; \
+ \
__chk_user_ptr(__gu_ptr); \
- switch (sizeof(*__gu_ptr)) { \
- case 1: \
- __put_user_asm("sb", (x), __gu_ptr, __pu_err); \
- break; \
- case 2: \
- __put_user_asm("sh", (x), __gu_ptr, __pu_err); \
- break; \
- case 4: \
- __put_user_asm("sw", (x), __gu_ptr, __pu_err); \
- break; \
- case 8: \
- __put_user_8((x), __gu_ptr, __pu_err); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
+ \
+ __enable_user_access(); \
+ __put_user_nocheck(__val, __gu_ptr, __pu_err); \
+ __disable_user_access(); \
+ \
__pu_err; \
})
@@ -385,9 +284,26 @@ do { \
-EFAULT; \
})
+
+unsigned long __must_check __asm_copy_to_user(void __user *to,
+ const void *from, unsigned long n);
+unsigned long __must_check __asm_copy_from_user(void *to,
+ const void __user *from, unsigned long n);
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ return __asm_copy_from_user(to, from, n);
+}
+
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ return __asm_copy_to_user(to, from, n);
+}
+
extern long strncpy_from_user(char *dest, const char __user *src, long count);
-extern long __must_check strlen_user(const char __user *str);
extern long __must_check strnlen_user(const char __user *str, long n);
extern
@@ -401,80 +317,23 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
__clear_user(to, n) : n;
}
-/*
- * Atomic compare-and-exchange, but with a fixup for userspace faults. Faults
- * will set "err" to -EFAULT, while successful accesses return the previous
- * value.
- */
-#define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \
-({ \
- __typeof__(ptr) __ptr = (ptr); \
- __typeof__(*(ptr)) __old = (old); \
- __typeof__(*(ptr)) __new = (new); \
- __typeof__(*(ptr)) __ret; \
- __typeof__(err) __err = 0; \
- register unsigned int __rc; \
- __enable_user_access(); \
- switch (size) { \
- case 4: \
- __asm__ __volatile__ ( \
- "0:\n" \
- " lr.w" #scb " %[ret], %[ptr]\n" \
- " bne %[ret], %z[old], 1f\n" \
- " sc.w" #lrb " %[rc], %z[new], %[ptr]\n" \
- " bnez %[rc], 0b\n" \
- "1:\n" \
- ".section .fixup,\"ax\"\n" \
- ".balign 4\n" \
- "2:\n" \
- " li %[err], %[efault]\n" \
- " jump 1b, %[rc]\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- ".balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 2b\n" \
- ".previous\n" \
- : [ret] "=&r" (__ret), \
- [rc] "=&r" (__rc), \
- [ptr] "+A" (*__ptr), \
- [err] "=&r" (__err) \
- : [old] "rJ" (__old), \
- [new] "rJ" (__new), \
- [efault] "i" (-EFAULT)); \
- break; \
- case 8: \
- __asm__ __volatile__ ( \
- "0:\n" \
- " lr.d" #scb " %[ret], %[ptr]\n" \
- " bne %[ret], %z[old], 1f\n" \
- " sc.d" #lrb " %[rc], %z[new], %[ptr]\n" \
- " bnez %[rc], 0b\n" \
- "1:\n" \
- ".section .fixup,\"ax\"\n" \
- ".balign 4\n" \
- "2:\n" \
- " li %[err], %[efault]\n" \
- " jump 1b, %[rc]\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- ".balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 2b\n" \
- ".previous\n" \
- : [ret] "=&r" (__ret), \
- [rc] "=&r" (__rc), \
- [ptr] "+A" (*__ptr), \
- [err] "=&r" (__err) \
- : [old] "rJ" (__old), \
- [new] "rJ" (__new), \
- [efault] "i" (-EFAULT)); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
- __disable_user_access(); \
- (err) = __err; \
- __ret; \
-})
+#define __get_kernel_nofault(dst, src, type, err_label) \
+do { \
+ long __kr_err; \
+ \
+ __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
+ if (unlikely(__kr_err)) \
+ goto err_label; \
+} while (0)
+
+#define __put_kernel_nofault(dst, src, type, err_label) \
+do { \
+ long __kr_err; \
+ \
+ __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
+ if (unlikely(__kr_err)) \
+ goto err_label; \
+} while (0)
#else /* CONFIG_MMU */
#include <asm-generic/uaccess.h>
diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
index 977ee6181dab..221630bdbd07 100644
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -10,6 +10,17 @@
#define __ARCH_WANT_SYS_CLONE
+#ifdef CONFIG_COMPAT
+#define __ARCH_WANT_COMPAT_TRUNCATE64
+#define __ARCH_WANT_COMPAT_FTRUNCATE64
+#define __ARCH_WANT_COMPAT_FALLOCATE
+#define __ARCH_WANT_COMPAT_PREAD64
+#define __ARCH_WANT_COMPAT_PWRITE64
+#define __ARCH_WANT_COMPAT_SYNC_FILE_RANGE
+#define __ARCH_WANT_COMPAT_READAHEAD
+#define __ARCH_WANT_COMPAT_FADVISE64_64
+#endif
+
#include <uapi/asm/unistd.h>
#define NR_syscalls (__NR_syscalls)
diff --git a/arch/riscv/include/asm/uprobes.h b/arch/riscv/include/asm/uprobes.h
new file mode 100644
index 000000000000..f2183e00fdd2
--- /dev/null
+++ b/arch/riscv/include/asm/uprobes.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_RISCV_UPROBES_H
+#define _ASM_RISCV_UPROBES_H
+
+#include <asm/probes.h>
+#include <asm/patch.h>
+#include <asm/bug.h>
+
+#define MAX_UINSN_BYTES 8
+
+#ifdef CONFIG_RISCV_ISA_C
+#define UPROBE_SWBP_INSN __BUG_INSN_16
+#define UPROBE_SWBP_INSN_SIZE 2
+#else
+#define UPROBE_SWBP_INSN __BUG_INSN_32
+#define UPROBE_SWBP_INSN_SIZE 4
+#endif
+#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES
+
+typedef u32 uprobe_opcode_t;
+
+struct arch_uprobe_task {
+ unsigned long saved_cause;
+};
+
+struct arch_uprobe {
+ union {
+ u8 insn[MAX_UINSN_BYTES];
+ u8 ixol[MAX_UINSN_BYTES];
+ };
+ struct arch_probe_insn api;
+ unsigned long insn_size;
+ bool simulate;
+};
+
+bool uprobe_breakpoint_handler(struct pt_regs *regs);
+bool uprobe_single_step_handler(struct pt_regs *regs);
+
+#endif /* _ASM_RISCV_UPROBES_H */
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 8454f746bbfd..af981426fe0f 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -8,27 +8,30 @@
#ifndef _ASM_RISCV_VDSO_H
#define _ASM_RISCV_VDSO_H
-#include <linux/types.h>
-
-#ifndef GENERIC_TIME_VSYSCALL
-struct vdso_data {
-};
-#endif
-
/*
- * The VDSO symbols are mapped into Linux so we can just use regular symbol
- * addressing to get their offsets in userspace. The symbols are mapped at an
- * offset of 0, but since the linker must support setting weak undefined
- * symbols to the absolute address 0 it also happens to support other low
- * addresses even when the code model suggests those low addresses would not
- * otherwise be availiable.
+ * All systems with an MMU have a VDSO, but systems without an MMU don't
+ * support shared libraries and therefor don't have one.
*/
+#ifdef CONFIG_MMU
+
+#define __VVAR_PAGES 2
+
+#ifndef __ASSEMBLY__
+#include <generated/vdso-offsets.h>
+
#define VDSO_SYMBOL(base, name) \
-({ \
- extern const char __vdso_##name[]; \
- (void __user *)((unsigned long)(base) + __vdso_##name); \
-})
+ (void __user *)((unsigned long)(base) + __vdso_##name##_offset)
+
+#ifdef CONFIG_COMPAT
+#include <generated/compat_vdso-offsets.h>
+
+#define COMPAT_VDSO_SYMBOL(base, name) \
+ (void __user *)((unsigned long)(base) + compat__vdso_##name##_offset)
+
+#endif /* CONFIG_COMPAT */
+
+#endif /* !__ASSEMBLY__ */
-asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
+#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_VDSO_H */
diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
index c8e818688ec1..77d9c2f721c4 100644
--- a/arch/riscv/include/asm/vdso/gettimeofday.h
+++ b/arch/riscv/include/asm/vdso/gettimeofday.h
@@ -4,6 +4,7 @@
#ifndef __ASSEMBLY__
+#include <asm/barrier.h>
#include <asm/unistd.h>
#include <asm/csr.h>
#include <uapi/linux/time.h>
@@ -59,7 +60,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
return ret;
}
-static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_data *vd)
{
/*
* The purpose of csr_read(CSR_TIME) is to trap the system into
@@ -74,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
return _vdso_data;
}
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+ return _timens_data;
+}
+#endif
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h
index 82a5693b1861..134388cbaaa1 100644
--- a/arch/riscv/include/asm/vdso/processor.h
+++ b/arch/riscv/include/asm/vdso/processor.h
@@ -4,6 +4,8 @@
#ifndef __ASSEMBLY__
+#include <asm/barrier.h>
+
static inline void cpu_relax(void)
{
#ifdef __riscv_muldiv
diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
new file mode 100644
index 000000000000..cb89af3f0704
--- /dev/null
+++ b/arch/riscv/include/asm/vendorid_list.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#ifndef ASM_VENDOR_LIST_H
+#define ASM_VENDOR_LIST_H
+
+#define SIFIVE_VENDOR_ID 0x489
+#define THEAD_VENDOR_ID 0x5b7
+
+#endif
diff --git a/arch/riscv/include/asm/xip_fixup.h b/arch/riscv/include/asm/xip_fixup.h
new file mode 100644
index 000000000000..d4ffc3c37649
--- /dev/null
+++ b/arch/riscv/include/asm/xip_fixup.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * XIP fixup macros, only useful in assembly.
+ */
+#ifndef _ASM_RISCV_XIP_FIXUP_H
+#define _ASM_RISCV_XIP_FIXUP_H
+
+#include <linux/pgtable.h>
+
+#ifdef CONFIG_XIP_KERNEL
+.macro XIP_FIXUP_OFFSET reg
+ REG_L t0, _xip_fixup
+ add \reg, \reg, t0
+.endm
+.macro XIP_FIXUP_FLASH_OFFSET reg
+ la t1, __data_loc
+ REG_L t1, _xip_phys_offset
+ sub \reg, \reg, t1
+ add \reg, \reg, t0
+.endm
+
+_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET
+_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET
+#else
+.macro XIP_FIXUP_OFFSET reg
+.endm
+.macro XIP_FIXUP_FLASH_OFFSET reg
+.endm
+#endif /* CONFIG_XIP_KERNEL */
+
+#endif
diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
index d86cb17bbabe..32c73ba1d531 100644
--- a/arch/riscv/include/uapi/asm/auxvec.h
+++ b/arch/riscv/include/uapi/asm/auxvec.h
@@ -10,4 +10,28 @@
/* vDSO location */
#define AT_SYSINFO_EHDR 33
+/*
+ * The set of entries below represent more extensive information
+ * about the caches, in the form of two entry per cache type,
+ * one entry containing the cache size in bytes, and the other
+ * containing the cache line size in bytes in the bottom 16 bits
+ * and the cache associativity in the next 16 bits.
+ *
+ * The associativity is such that if N is the 16-bit value, the
+ * cache is N way set associative. A value if 0xffff means fully
+ * associative, a value of 1 means directly mapped.
+ *
+ * For all these fields, a value of 0 means that the information
+ * is not known.
+ */
+#define AT_L1I_CACHESIZE 40
+#define AT_L1I_CACHEGEOMETRY 41
+#define AT_L1D_CACHESIZE 42
+#define AT_L1D_CACHEGEOMETRY 43
+#define AT_L2_CACHESIZE 44
+#define AT_L2_CACHEGEOMETRY 45
+
+/* entries in ARCH_DLINFO */
+#define AT_VECTOR_SIZE_ARCH 7
+
#endif /* _UAPI_ASM_RISCV_AUXVEC_H */
diff --git a/arch/riscv/include/uapi/asm/hwcap.h b/arch/riscv/include/uapi/asm/hwcap.h
index dee98ee28318..46dc3f5ee99f 100644
--- a/arch/riscv/include/uapi/asm/hwcap.h
+++ b/arch/riscv/include/uapi/asm/hwcap.h
@@ -11,7 +11,7 @@
/*
* Linux saves the floating-point registers according to the ISA Linux is
* executing on, as opposed to the ISA the user program is compiled for. This
- * is necessary for a handful of esoteric use cases: for example, userpsace
+ * is necessary for a handful of esoteric use cases: for example, userspace
* threading libraries must be able to examine the actual machine state in
* order to fully reconstruct the state of a thread.
*/
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..6119368ba6d5
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#ifndef __LINUX_KVM_RISCV_H
+#define __LINUX_KVM_RISCV_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_READONLY_MEM
+
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#define KVM_INTERRUPT_SET -1U
+#define KVM_INTERRUPT_UNSET -2U
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+};
+
+/* KVM Debug exit structure */
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+};
+
+/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_config {
+ unsigned long isa;
+};
+
+/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_core {
+ struct user_regs_struct regs;
+ unsigned long mode;
+};
+
+/* Possible privilege modes for kvm_riscv_core */
+#define KVM_RISCV_MODE_S 1
+#define KVM_RISCV_MODE_U 0
+
+/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_csr {
+ unsigned long sstatus;
+ unsigned long sie;
+ unsigned long stvec;
+ unsigned long sscratch;
+ unsigned long sepc;
+ unsigned long scause;
+ unsigned long stval;
+ unsigned long sip;
+ unsigned long satp;
+ unsigned long scounteren;
+};
+
+/* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_timer {
+ __u64 frequency;
+ __u64 time;
+ __u64 compare;
+ __u64 state;
+};
+
+/*
+ * ISA extension IDs specific to KVM. This is not the same as the host ISA
+ * extension IDs as that is internal to the host and should not be exposed
+ * to the guest. This should always be contiguous to keep the mapping simple
+ * in KVM implementation.
+ */
+enum KVM_RISCV_ISA_EXT_ID {
+ KVM_RISCV_ISA_EXT_A = 0,
+ KVM_RISCV_ISA_EXT_C,
+ KVM_RISCV_ISA_EXT_D,
+ KVM_RISCV_ISA_EXT_F,
+ KVM_RISCV_ISA_EXT_H,
+ KVM_RISCV_ISA_EXT_I,
+ KVM_RISCV_ISA_EXT_M,
+ KVM_RISCV_ISA_EXT_MAX,
+};
+
+/* Possible states for kvm_riscv_timer */
+#define KVM_RISCV_TIMER_STATE_OFF 0
+#define KVM_RISCV_TIMER_STATE_ON 1
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000
+#define KVM_REG_RISCV_TYPE_SHIFT 24
+
+/* Config registers are mapped as type 1 */
+#define KVM_REG_RISCV_CONFIG (0x01 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CONFIG_REG(name) \
+ (offsetof(struct kvm_riscv_config, name) / sizeof(unsigned long))
+
+/* Core registers are mapped as type 2 */
+#define KVM_REG_RISCV_CORE (0x02 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CORE_REG(name) \
+ (offsetof(struct kvm_riscv_core, name) / sizeof(unsigned long))
+
+/* Control and status registers are mapped as type 3 */
+#define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CSR_REG(name) \
+ (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
+
+/* Timer registers are mapped as type 4 */
+#define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_TIMER_REG(name) \
+ (offsetof(struct kvm_riscv_timer, name) / sizeof(__u64))
+
+/* F extension registers are mapped as type 5 */
+#define KVM_REG_RISCV_FP_F (0x05 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_F_REG(name) \
+ (offsetof(struct __riscv_f_ext_state, name) / sizeof(__u32))
+
+/* D extension registers are mapped as type 6 */
+#define KVM_REG_RISCV_FP_D (0x06 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_D_REG(name) \
+ (offsetof(struct __riscv_d_ext_state, name) / sizeof(__u64))
+
+/* ISA Extension registers are mapped as type 7 */
+#define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT)
+
+#endif
+
+#endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h
index 13ce76cc5aff..73d7cdd2ec49 100644
--- a/arch/riscv/include/uapi/asm/unistd.h
+++ b/arch/riscv/include/uapi/asm/unistd.h
@@ -12,15 +12,17 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-#ifdef __LP64__
+#if defined(__LP64__) && !defined(__SYSCALL_COMPAT)
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SET_GET_RLIMIT
-#define __ARCH_WANT_SYS_CLONE3
#endif /* __LP64__ */
+#define __ARCH_WANT_SYS_CLONE3
+#define __ARCH_WANT_MEMFD_SECRET
+
#include <asm-generic/unistd.h>
/*
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index b355cf485671..c71d6591d539 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -4,14 +4,35 @@
#
ifdef CONFIG_FTRACE
-CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_patch.o = -pg
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE)
+endif
+CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
+
+ifdef CONFIG_KEXEC
+AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax)
+endif
+
+# cmodel=medany and notrace when patching early
+ifdef CONFIG_RISCV_ALTERNATIVE_EARLY
+CFLAGS_alternative.o := -mcmodel=medany
+CFLAGS_cpufeature.o := -mcmodel=medany
+ifdef CONFIG_FTRACE
+CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE)
+endif
+ifdef CONFIG_KASAN
+KASAN_SANITIZE_alternative.o := n
+KASAN_SANITIZE_cpufeature.o := n
+endif
endif
extra-y += head.o
extra-y += vmlinux.lds
obj-y += soc.o
+obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o
obj-y += cpu.o
obj-y += cpufeature.o
obj-y += entry.o
@@ -29,21 +50,26 @@ obj-y += riscv_ksyms.o
obj-y += stacktrace.o
obj-y += cacheinfo.o
obj-y += patch.o
+obj-y += probes/
obj-$(CONFIG_MMU) += vdso.o vdso/
-obj-$(CONFIG_RISCV_M_MODE) += clint.o traps_misaligned.o
+obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o
obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += cpu_ops.o
-obj-$(CONFIG_SMP) += cpu_ops_spinwait.o
+
+obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
+obj-$(CONFIG_CPU_PM) += suspend_entry.o suspend.o
+
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
-obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o
+obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
+
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
obj-$(CONFIG_RISCV_SBI) += sbi.o
@@ -52,5 +78,13 @@ obj-$(CONFIG_SMP) += cpu_ops_sbi.o
endif
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
obj-$(CONFIG_KGDB) += kgdb.o
+obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o
+obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
-clean:
+obj-$(CONFIG_EFI) += efi.o
+obj-$(CONFIG_COMPAT) += compat_syscall_table.o
+obj-$(CONFIG_COMPAT) += compat_signal.o
+obj-$(CONFIG_COMPAT) += compat_vdso/
diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
new file mode 100644
index 000000000000..c9d0d3c53223
--- /dev/null
+++ b/arch/riscv/kernel/alternative.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * alternative runtime patching
+ * inspired by the ARM64 and x86 version
+ *
+ * Copyright (C) 2021 Sifive.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/uaccess.h>
+#include <asm/alternative.h>
+#include <asm/sections.h>
+#include <asm/vendorid_list.h>
+#include <asm/sbi.h>
+#include <asm/csr.h>
+
+struct cpu_manufacturer_info_t {
+ unsigned long vendor_id;
+ unsigned long arch_id;
+ unsigned long imp_id;
+ void (*vendor_patch_func)(struct alt_entry *begin, struct alt_entry *end,
+ unsigned long archid, unsigned long impid,
+ unsigned int stage);
+};
+
+static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info)
+{
+#ifdef CONFIG_RISCV_M_MODE
+ cpu_mfr_info->vendor_id = csr_read(CSR_MVENDORID);
+ cpu_mfr_info->arch_id = csr_read(CSR_MARCHID);
+ cpu_mfr_info->imp_id = csr_read(CSR_MIMPID);
+#else
+ cpu_mfr_info->vendor_id = sbi_get_mvendorid();
+ cpu_mfr_info->arch_id = sbi_get_marchid();
+ cpu_mfr_info->imp_id = sbi_get_mimpid();
+#endif
+
+ switch (cpu_mfr_info->vendor_id) {
+#ifdef CONFIG_ERRATA_SIFIVE
+ case SIFIVE_VENDOR_ID:
+ cpu_mfr_info->vendor_patch_func = sifive_errata_patch_func;
+ break;
+#endif
+#ifdef CONFIG_ERRATA_THEAD
+ case THEAD_VENDOR_ID:
+ cpu_mfr_info->vendor_patch_func = thead_errata_patch_func;
+ break;
+#endif
+ default:
+ cpu_mfr_info->vendor_patch_func = NULL;
+ }
+}
+
+/*
+ * This is called very early in the boot process (directly after we run
+ * a feature detect on the boot CPU). No need to worry about other CPUs
+ * here.
+ */
+static void __init_or_module _apply_alternatives(struct alt_entry *begin,
+ struct alt_entry *end,
+ unsigned int stage)
+{
+ struct cpu_manufacturer_info_t cpu_mfr_info;
+
+ riscv_fill_cpu_mfr_info(&cpu_mfr_info);
+
+ riscv_cpufeature_patch_func(begin, end, stage);
+
+ if (!cpu_mfr_info.vendor_patch_func)
+ return;
+
+ cpu_mfr_info.vendor_patch_func(begin, end,
+ cpu_mfr_info.arch_id,
+ cpu_mfr_info.imp_id,
+ stage);
+}
+
+void __init apply_boot_alternatives(void)
+{
+ /* If called on non-boot cpu things could go wrong */
+ WARN_ON(smp_processor_id() != 0);
+
+ _apply_alternatives((struct alt_entry *)__alt_start,
+ (struct alt_entry *)__alt_end,
+ RISCV_ALTERNATIVES_BOOT);
+}
+
+/*
+ * apply_early_boot_alternatives() is called from setup_vm() with MMU-off.
+ *
+ * Following requirements should be honoured for it to work correctly:
+ * 1) It should use PC-relative addressing for accessing kernel symbols.
+ * To achieve this we always use GCC cmodel=medany.
+ * 2) The compiler instrumentation for FTRACE will not work for setup_vm()
+ * so disable compiler instrumentation when FTRACE is enabled.
+ *
+ * Currently, the above requirements are honoured by using custom CFLAGS
+ * for alternative.o in kernel/Makefile.
+ */
+void __init apply_early_boot_alternatives(void)
+{
+#ifdef CONFIG_RISCV_ALTERNATIVE_EARLY
+ _apply_alternatives((struct alt_entry *)__alt_start,
+ (struct alt_entry *)__alt_end,
+ RISCV_ALTERNATIVES_EARLY_BOOT);
+#endif
+}
+
+#ifdef CONFIG_MODULES
+void apply_module_alternatives(void *start, size_t length)
+{
+ _apply_alternatives((struct alt_entry *)start,
+ (struct alt_entry *)(start + length),
+ RISCV_ALTERNATIVES_MODULE);
+}
+#endif
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 07cb9c10de4e..df9444397908 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -7,9 +7,15 @@
#define GENERATING_ASM_OFFSETS
#include <linux/kbuild.h>
+#include <linux/mm.h>
#include <linux/sched.h>
+#include <asm/kvm_host.h>
#include <asm/thread_info.h>
#include <asm/ptrace.h>
+#include <asm/cpu_ops_sbi.h>
+#include <asm/suspend.h>
+
+void asm_offsets(void);
void asm_offsets(void)
{
@@ -27,14 +33,10 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]);
OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]);
OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]);
- OFFSET(TASK_THREAD_SP, task_struct, thread.sp);
- OFFSET(TASK_STACK, task_struct, stack);
- OFFSET(TASK_TI, task_struct, thread_info);
OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags);
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
- OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]);
OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]);
@@ -69,6 +71,9 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_F30, task_struct, thread.fstate.f[30]);
OFFSET(TASK_THREAD_F31, task_struct, thread.fstate.f[31]);
OFFSET(TASK_THREAD_FCSR, task_struct, thread.fstate.fcsr);
+#ifdef CONFIG_STACKPROTECTOR
+ OFFSET(TSK_STACK_CANARY, task_struct, stack_canary);
+#endif
DEFINE(PT_SIZE, sizeof(struct pt_regs));
OFFSET(PT_EPC, pt_regs, epc);
@@ -109,6 +114,162 @@ void asm_offsets(void)
OFFSET(PT_BADADDR, pt_regs, badaddr);
OFFSET(PT_CAUSE, pt_regs, cause);
+ OFFSET(SUSPEND_CONTEXT_REGS, suspend_context, regs);
+
+ OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero);
+ OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra);
+ OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp);
+ OFFSET(KVM_ARCH_GUEST_GP, kvm_vcpu_arch, guest_context.gp);
+ OFFSET(KVM_ARCH_GUEST_TP, kvm_vcpu_arch, guest_context.tp);
+ OFFSET(KVM_ARCH_GUEST_T0, kvm_vcpu_arch, guest_context.t0);
+ OFFSET(KVM_ARCH_GUEST_T1, kvm_vcpu_arch, guest_context.t1);
+ OFFSET(KVM_ARCH_GUEST_T2, kvm_vcpu_arch, guest_context.t2);
+ OFFSET(KVM_ARCH_GUEST_S0, kvm_vcpu_arch, guest_context.s0);
+ OFFSET(KVM_ARCH_GUEST_S1, kvm_vcpu_arch, guest_context.s1);
+ OFFSET(KVM_ARCH_GUEST_A0, kvm_vcpu_arch, guest_context.a0);
+ OFFSET(KVM_ARCH_GUEST_A1, kvm_vcpu_arch, guest_context.a1);
+ OFFSET(KVM_ARCH_GUEST_A2, kvm_vcpu_arch, guest_context.a2);
+ OFFSET(KVM_ARCH_GUEST_A3, kvm_vcpu_arch, guest_context.a3);
+ OFFSET(KVM_ARCH_GUEST_A4, kvm_vcpu_arch, guest_context.a4);
+ OFFSET(KVM_ARCH_GUEST_A5, kvm_vcpu_arch, guest_context.a5);
+ OFFSET(KVM_ARCH_GUEST_A6, kvm_vcpu_arch, guest_context.a6);
+ OFFSET(KVM_ARCH_GUEST_A7, kvm_vcpu_arch, guest_context.a7);
+ OFFSET(KVM_ARCH_GUEST_S2, kvm_vcpu_arch, guest_context.s2);
+ OFFSET(KVM_ARCH_GUEST_S3, kvm_vcpu_arch, guest_context.s3);
+ OFFSET(KVM_ARCH_GUEST_S4, kvm_vcpu_arch, guest_context.s4);
+ OFFSET(KVM_ARCH_GUEST_S5, kvm_vcpu_arch, guest_context.s5);
+ OFFSET(KVM_ARCH_GUEST_S6, kvm_vcpu_arch, guest_context.s6);
+ OFFSET(KVM_ARCH_GUEST_S7, kvm_vcpu_arch, guest_context.s7);
+ OFFSET(KVM_ARCH_GUEST_S8, kvm_vcpu_arch, guest_context.s8);
+ OFFSET(KVM_ARCH_GUEST_S9, kvm_vcpu_arch, guest_context.s9);
+ OFFSET(KVM_ARCH_GUEST_S10, kvm_vcpu_arch, guest_context.s10);
+ OFFSET(KVM_ARCH_GUEST_S11, kvm_vcpu_arch, guest_context.s11);
+ OFFSET(KVM_ARCH_GUEST_T3, kvm_vcpu_arch, guest_context.t3);
+ OFFSET(KVM_ARCH_GUEST_T4, kvm_vcpu_arch, guest_context.t4);
+ OFFSET(KVM_ARCH_GUEST_T5, kvm_vcpu_arch, guest_context.t5);
+ OFFSET(KVM_ARCH_GUEST_T6, kvm_vcpu_arch, guest_context.t6);
+ OFFSET(KVM_ARCH_GUEST_SEPC, kvm_vcpu_arch, guest_context.sepc);
+ OFFSET(KVM_ARCH_GUEST_SSTATUS, kvm_vcpu_arch, guest_context.sstatus);
+ OFFSET(KVM_ARCH_GUEST_HSTATUS, kvm_vcpu_arch, guest_context.hstatus);
+ OFFSET(KVM_ARCH_GUEST_SCOUNTEREN, kvm_vcpu_arch, guest_csr.scounteren);
+
+ OFFSET(KVM_ARCH_HOST_ZERO, kvm_vcpu_arch, host_context.zero);
+ OFFSET(KVM_ARCH_HOST_RA, kvm_vcpu_arch, host_context.ra);
+ OFFSET(KVM_ARCH_HOST_SP, kvm_vcpu_arch, host_context.sp);
+ OFFSET(KVM_ARCH_HOST_GP, kvm_vcpu_arch, host_context.gp);
+ OFFSET(KVM_ARCH_HOST_TP, kvm_vcpu_arch, host_context.tp);
+ OFFSET(KVM_ARCH_HOST_T0, kvm_vcpu_arch, host_context.t0);
+ OFFSET(KVM_ARCH_HOST_T1, kvm_vcpu_arch, host_context.t1);
+ OFFSET(KVM_ARCH_HOST_T2, kvm_vcpu_arch, host_context.t2);
+ OFFSET(KVM_ARCH_HOST_S0, kvm_vcpu_arch, host_context.s0);
+ OFFSET(KVM_ARCH_HOST_S1, kvm_vcpu_arch, host_context.s1);
+ OFFSET(KVM_ARCH_HOST_A0, kvm_vcpu_arch, host_context.a0);
+ OFFSET(KVM_ARCH_HOST_A1, kvm_vcpu_arch, host_context.a1);
+ OFFSET(KVM_ARCH_HOST_A2, kvm_vcpu_arch, host_context.a2);
+ OFFSET(KVM_ARCH_HOST_A3, kvm_vcpu_arch, host_context.a3);
+ OFFSET(KVM_ARCH_HOST_A4, kvm_vcpu_arch, host_context.a4);
+ OFFSET(KVM_ARCH_HOST_A5, kvm_vcpu_arch, host_context.a5);
+ OFFSET(KVM_ARCH_HOST_A6, kvm_vcpu_arch, host_context.a6);
+ OFFSET(KVM_ARCH_HOST_A7, kvm_vcpu_arch, host_context.a7);
+ OFFSET(KVM_ARCH_HOST_S2, kvm_vcpu_arch, host_context.s2);
+ OFFSET(KVM_ARCH_HOST_S3, kvm_vcpu_arch, host_context.s3);
+ OFFSET(KVM_ARCH_HOST_S4, kvm_vcpu_arch, host_context.s4);
+ OFFSET(KVM_ARCH_HOST_S5, kvm_vcpu_arch, host_context.s5);
+ OFFSET(KVM_ARCH_HOST_S6, kvm_vcpu_arch, host_context.s6);
+ OFFSET(KVM_ARCH_HOST_S7, kvm_vcpu_arch, host_context.s7);
+ OFFSET(KVM_ARCH_HOST_S8, kvm_vcpu_arch, host_context.s8);
+ OFFSET(KVM_ARCH_HOST_S9, kvm_vcpu_arch, host_context.s9);
+ OFFSET(KVM_ARCH_HOST_S10, kvm_vcpu_arch, host_context.s10);
+ OFFSET(KVM_ARCH_HOST_S11, kvm_vcpu_arch, host_context.s11);
+ OFFSET(KVM_ARCH_HOST_T3, kvm_vcpu_arch, host_context.t3);
+ OFFSET(KVM_ARCH_HOST_T4, kvm_vcpu_arch, host_context.t4);
+ OFFSET(KVM_ARCH_HOST_T5, kvm_vcpu_arch, host_context.t5);
+ OFFSET(KVM_ARCH_HOST_T6, kvm_vcpu_arch, host_context.t6);
+ OFFSET(KVM_ARCH_HOST_SEPC, kvm_vcpu_arch, host_context.sepc);
+ OFFSET(KVM_ARCH_HOST_SSTATUS, kvm_vcpu_arch, host_context.sstatus);
+ OFFSET(KVM_ARCH_HOST_HSTATUS, kvm_vcpu_arch, host_context.hstatus);
+ OFFSET(KVM_ARCH_HOST_SSCRATCH, kvm_vcpu_arch, host_sscratch);
+ OFFSET(KVM_ARCH_HOST_STVEC, kvm_vcpu_arch, host_stvec);
+ OFFSET(KVM_ARCH_HOST_SCOUNTEREN, kvm_vcpu_arch, host_scounteren);
+
+ OFFSET(KVM_ARCH_TRAP_SEPC, kvm_cpu_trap, sepc);
+ OFFSET(KVM_ARCH_TRAP_SCAUSE, kvm_cpu_trap, scause);
+ OFFSET(KVM_ARCH_TRAP_STVAL, kvm_cpu_trap, stval);
+ OFFSET(KVM_ARCH_TRAP_HTVAL, kvm_cpu_trap, htval);
+ OFFSET(KVM_ARCH_TRAP_HTINST, kvm_cpu_trap, htinst);
+
+ /* F extension */
+
+ OFFSET(KVM_ARCH_FP_F_F0, kvm_cpu_context, fp.f.f[0]);
+ OFFSET(KVM_ARCH_FP_F_F1, kvm_cpu_context, fp.f.f[1]);
+ OFFSET(KVM_ARCH_FP_F_F2, kvm_cpu_context, fp.f.f[2]);
+ OFFSET(KVM_ARCH_FP_F_F3, kvm_cpu_context, fp.f.f[3]);
+ OFFSET(KVM_ARCH_FP_F_F4, kvm_cpu_context, fp.f.f[4]);
+ OFFSET(KVM_ARCH_FP_F_F5, kvm_cpu_context, fp.f.f[5]);
+ OFFSET(KVM_ARCH_FP_F_F6, kvm_cpu_context, fp.f.f[6]);
+ OFFSET(KVM_ARCH_FP_F_F7, kvm_cpu_context, fp.f.f[7]);
+ OFFSET(KVM_ARCH_FP_F_F8, kvm_cpu_context, fp.f.f[8]);
+ OFFSET(KVM_ARCH_FP_F_F9, kvm_cpu_context, fp.f.f[9]);
+ OFFSET(KVM_ARCH_FP_F_F10, kvm_cpu_context, fp.f.f[10]);
+ OFFSET(KVM_ARCH_FP_F_F11, kvm_cpu_context, fp.f.f[11]);
+ OFFSET(KVM_ARCH_FP_F_F12, kvm_cpu_context, fp.f.f[12]);
+ OFFSET(KVM_ARCH_FP_F_F13, kvm_cpu_context, fp.f.f[13]);
+ OFFSET(KVM_ARCH_FP_F_F14, kvm_cpu_context, fp.f.f[14]);
+ OFFSET(KVM_ARCH_FP_F_F15, kvm_cpu_context, fp.f.f[15]);
+ OFFSET(KVM_ARCH_FP_F_F16, kvm_cpu_context, fp.f.f[16]);
+ OFFSET(KVM_ARCH_FP_F_F17, kvm_cpu_context, fp.f.f[17]);
+ OFFSET(KVM_ARCH_FP_F_F18, kvm_cpu_context, fp.f.f[18]);
+ OFFSET(KVM_ARCH_FP_F_F19, kvm_cpu_context, fp.f.f[19]);
+ OFFSET(KVM_ARCH_FP_F_F20, kvm_cpu_context, fp.f.f[20]);
+ OFFSET(KVM_ARCH_FP_F_F21, kvm_cpu_context, fp.f.f[21]);
+ OFFSET(KVM_ARCH_FP_F_F22, kvm_cpu_context, fp.f.f[22]);
+ OFFSET(KVM_ARCH_FP_F_F23, kvm_cpu_context, fp.f.f[23]);
+ OFFSET(KVM_ARCH_FP_F_F24, kvm_cpu_context, fp.f.f[24]);
+ OFFSET(KVM_ARCH_FP_F_F25, kvm_cpu_context, fp.f.f[25]);
+ OFFSET(KVM_ARCH_FP_F_F26, kvm_cpu_context, fp.f.f[26]);
+ OFFSET(KVM_ARCH_FP_F_F27, kvm_cpu_context, fp.f.f[27]);
+ OFFSET(KVM_ARCH_FP_F_F28, kvm_cpu_context, fp.f.f[28]);
+ OFFSET(KVM_ARCH_FP_F_F29, kvm_cpu_context, fp.f.f[29]);
+ OFFSET(KVM_ARCH_FP_F_F30, kvm_cpu_context, fp.f.f[30]);
+ OFFSET(KVM_ARCH_FP_F_F31, kvm_cpu_context, fp.f.f[31]);
+ OFFSET(KVM_ARCH_FP_F_FCSR, kvm_cpu_context, fp.f.fcsr);
+
+ /* D extension */
+
+ OFFSET(KVM_ARCH_FP_D_F0, kvm_cpu_context, fp.d.f[0]);
+ OFFSET(KVM_ARCH_FP_D_F1, kvm_cpu_context, fp.d.f[1]);
+ OFFSET(KVM_ARCH_FP_D_F2, kvm_cpu_context, fp.d.f[2]);
+ OFFSET(KVM_ARCH_FP_D_F3, kvm_cpu_context, fp.d.f[3]);
+ OFFSET(KVM_ARCH_FP_D_F4, kvm_cpu_context, fp.d.f[4]);
+ OFFSET(KVM_ARCH_FP_D_F5, kvm_cpu_context, fp.d.f[5]);
+ OFFSET(KVM_ARCH_FP_D_F6, kvm_cpu_context, fp.d.f[6]);
+ OFFSET(KVM_ARCH_FP_D_F7, kvm_cpu_context, fp.d.f[7]);
+ OFFSET(KVM_ARCH_FP_D_F8, kvm_cpu_context, fp.d.f[8]);
+ OFFSET(KVM_ARCH_FP_D_F9, kvm_cpu_context, fp.d.f[9]);
+ OFFSET(KVM_ARCH_FP_D_F10, kvm_cpu_context, fp.d.f[10]);
+ OFFSET(KVM_ARCH_FP_D_F11, kvm_cpu_context, fp.d.f[11]);
+ OFFSET(KVM_ARCH_FP_D_F12, kvm_cpu_context, fp.d.f[12]);
+ OFFSET(KVM_ARCH_FP_D_F13, kvm_cpu_context, fp.d.f[13]);
+ OFFSET(KVM_ARCH_FP_D_F14, kvm_cpu_context, fp.d.f[14]);
+ OFFSET(KVM_ARCH_FP_D_F15, kvm_cpu_context, fp.d.f[15]);
+ OFFSET(KVM_ARCH_FP_D_F16, kvm_cpu_context, fp.d.f[16]);
+ OFFSET(KVM_ARCH_FP_D_F17, kvm_cpu_context, fp.d.f[17]);
+ OFFSET(KVM_ARCH_FP_D_F18, kvm_cpu_context, fp.d.f[18]);
+ OFFSET(KVM_ARCH_FP_D_F19, kvm_cpu_context, fp.d.f[19]);
+ OFFSET(KVM_ARCH_FP_D_F20, kvm_cpu_context, fp.d.f[20]);
+ OFFSET(KVM_ARCH_FP_D_F21, kvm_cpu_context, fp.d.f[21]);
+ OFFSET(KVM_ARCH_FP_D_F22, kvm_cpu_context, fp.d.f[22]);
+ OFFSET(KVM_ARCH_FP_D_F23, kvm_cpu_context, fp.d.f[23]);
+ OFFSET(KVM_ARCH_FP_D_F24, kvm_cpu_context, fp.d.f[24]);
+ OFFSET(KVM_ARCH_FP_D_F25, kvm_cpu_context, fp.d.f[25]);
+ OFFSET(KVM_ARCH_FP_D_F26, kvm_cpu_context, fp.d.f[26]);
+ OFFSET(KVM_ARCH_FP_D_F27, kvm_cpu_context, fp.d.f[27]);
+ OFFSET(KVM_ARCH_FP_D_F28, kvm_cpu_context, fp.d.f[28]);
+ OFFSET(KVM_ARCH_FP_D_F29, kvm_cpu_context, fp.d.f[29]);
+ OFFSET(KVM_ARCH_FP_D_F30, kvm_cpu_context, fp.d.f[30]);
+ OFFSET(KVM_ARCH_FP_D_F31, kvm_cpu_context, fp.d.f[31]);
+ OFFSET(KVM_ARCH_FP_D_FCSR, kvm_cpu_context, fp.d.fcsr);
+
/*
* THREAD_{F,X}* might be larger than a S-type offset can handle, but
* these are used in performance-sensitive assembly so we can't resort
@@ -309,4 +470,8 @@ void asm_offsets(void)
* ensures the alignment is sane.
*/
DEFINE(PT_SIZE_ON_STACK, ALIGN(sizeof(struct pt_regs), STACK_ALIGN));
+
+ OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr);
+ OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr);
+ OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
}
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index bd0f122965c3..90deabfe63ea 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -3,7 +3,6 @@
* Copyright (C) 2017 SiFive
*/
-#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/of.h>
#include <linux/of_device.h>
@@ -25,15 +24,96 @@ cache_get_priv_group(struct cacheinfo *this_leaf)
return NULL;
}
-static void ci_leaf_init(struct cacheinfo *this_leaf,
- struct device_node *node,
- enum cache_type type, unsigned int level)
+static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type)
+{
+ /*
+ * Using raw_smp_processor_id() elides a preemptability check, but this
+ * is really indicative of a larger problem: the cacheinfo UABI assumes
+ * that cores have a homonogenous view of the cache hierarchy. That
+ * happens to be the case for the current set of RISC-V systems, but
+ * likely won't be true in general. Since there's no way to provide
+ * correct information for these systems via the current UABI we're
+ * just eliding the check for now.
+ */
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(raw_smp_processor_id());
+ struct cacheinfo *this_leaf;
+ int index;
+
+ for (index = 0; index < this_cpu_ci->num_leaves; index++) {
+ this_leaf = this_cpu_ci->info_list + index;
+ if (this_leaf->level == level && this_leaf->type == type)
+ return this_leaf;
+ }
+
+ return NULL;
+}
+
+uintptr_t get_cache_size(u32 level, enum cache_type type)
+{
+ struct cacheinfo *this_leaf = get_cacheinfo(level, type);
+
+ return this_leaf ? this_leaf->size : 0;
+}
+
+uintptr_t get_cache_geometry(u32 level, enum cache_type type)
+{
+ struct cacheinfo *this_leaf = get_cacheinfo(level, type);
+
+ return this_leaf ? (this_leaf->ways_of_associativity << 16 |
+ this_leaf->coherency_line_size) :
+ 0;
+}
+
+static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
+ unsigned int level, unsigned int size,
+ unsigned int sets, unsigned int line_size)
{
this_leaf->level = level;
this_leaf->type = type;
+ this_leaf->size = size;
+ this_leaf->number_of_sets = sets;
+ this_leaf->coherency_line_size = line_size;
+
+ /*
+ * If the cache is fully associative, there is no need to
+ * check the other properties.
+ */
+ if (sets == 1)
+ return;
+
+ /*
+ * Set the ways number for n-ways associative, make sure
+ * all properties are big than zero.
+ */
+ if (sets > 0 && size > 0 && line_size > 0)
+ this_leaf->ways_of_associativity = (size / sets) / line_size;
+}
+
+static void fill_cacheinfo(struct cacheinfo **this_leaf,
+ struct device_node *node, unsigned int level)
+{
+ unsigned int size, sets, line_size;
+
+ if (!of_property_read_u32(node, "cache-size", &size) &&
+ !of_property_read_u32(node, "cache-block-size", &line_size) &&
+ !of_property_read_u32(node, "cache-sets", &sets)) {
+ ci_leaf_init((*this_leaf)++, CACHE_TYPE_UNIFIED, level, size, sets, line_size);
+ }
+
+ if (!of_property_read_u32(node, "i-cache-size", &size) &&
+ !of_property_read_u32(node, "i-cache-sets", &sets) &&
+ !of_property_read_u32(node, "i-cache-block-size", &line_size)) {
+ ci_leaf_init((*this_leaf)++, CACHE_TYPE_INST, level, size, sets, line_size);
+ }
+
+ if (!of_property_read_u32(node, "d-cache-size", &size) &&
+ !of_property_read_u32(node, "d-cache-sets", &sets) &&
+ !of_property_read_u32(node, "d-cache-block-size", &line_size)) {
+ ci_leaf_init((*this_leaf)++, CACHE_TYPE_DATA, level, size, sets, line_size);
+ }
}
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct device_node *np = of_cpu_device_node_get(cpu);
@@ -75,7 +155,7 @@ static int __init_cache_level(unsigned int cpu)
return 0;
}
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *this_leaf = this_cpu_ci->info_list;
@@ -83,35 +163,27 @@ static int __populate_cache_leaves(unsigned int cpu)
struct device_node *prev = NULL;
int levels = 1, level = 1;
- if (of_property_read_bool(np, "cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
- if (of_property_read_bool(np, "i-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
- if (of_property_read_bool(np, "d-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
+ /* Level 1 caches in cpu node */
+ fill_cacheinfo(&this_leaf, np, level);
+ /* Next level caches in cache nodes */
prev = np;
while ((np = of_find_next_cache_node(np))) {
of_node_put(prev);
prev = np;
+
if (!of_device_is_compatible(np, "cache"))
break;
if (of_property_read_u32(np, "cache-level", &level))
break;
if (level <= levels)
break;
- if (of_property_read_bool(np, "cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
- if (of_property_read_bool(np, "i-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
- if (of_property_read_bool(np, "d-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
+
+ fill_cacheinfo(&this_leaf, np, level);
+
levels = level;
}
of_node_put(np);
return 0;
}
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/riscv/kernel/clint.c b/arch/riscv/kernel/clint.c
deleted file mode 100644
index 3647980d14c3..000000000000
--- a/arch/riscv/kernel/clint.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2019 Christoph Hellwig.
- */
-
-#include <linux/io.h>
-#include <linux/of_address.h>
-#include <linux/types.h>
-#include <asm/clint.h>
-#include <asm/csr.h>
-#include <asm/timex.h>
-#include <asm/smp.h>
-
-/*
- * This is the layout used by the SiFive clint, which is also shared by the qemu
- * virt platform, and the Kendryte KD210 at least.
- */
-#define CLINT_IPI_OFF 0
-#define CLINT_TIME_CMP_OFF 0x4000
-#define CLINT_TIME_VAL_OFF 0xbff8
-
-u32 __iomem *clint_ipi_base;
-
-void clint_init_boot_cpu(void)
-{
- struct device_node *np;
- void __iomem *base;
-
- np = of_find_compatible_node(NULL, NULL, "riscv,clint0");
- if (!np) {
- panic("clint not found");
- return;
- }
-
- base = of_iomap(np, 0);
- if (!base)
- panic("could not map CLINT");
-
- clint_ipi_base = base + CLINT_IPI_OFF;
- riscv_time_cmp = base + CLINT_TIME_CMP_OFF;
- riscv_time_val = base + CLINT_TIME_VAL_OFF;
-
- clint_clear_ipi(boot_cpu_hartid);
-}
diff --git a/arch/riscv/kernel/compat_signal.c b/arch/riscv/kernel/compat_signal.c
new file mode 100644
index 000000000000..6ec4e34255a9
--- /dev/null
+++ b/arch/riscv/kernel/compat_signal.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/compat.h>
+#include <linux/signal.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/linkage.h>
+
+#include <asm/csr.h>
+#include <asm/signal32.h>
+#include <asm/switch_to.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+
+#define COMPAT_DEBUG_SIG 0
+
+struct compat_sigcontext {
+ struct compat_user_regs_struct sc_regs;
+ union __riscv_fp_state sc_fpregs;
+};
+
+struct compat_ucontext {
+ compat_ulong_t uc_flags;
+ struct compat_ucontext *uc_link;
+ compat_stack_t uc_stack;
+ sigset_t uc_sigmask;
+ /* There's some padding here to allow sigset_t to be expanded in the
+ * future. Though this is unlikely, other architectures put uc_sigmask
+ * at the end of this structure and explicitly state it can be
+ * expanded, so we didn't want to box ourselves in here. */
+ __u8 __unused[1024 / 8 - sizeof(sigset_t)];
+ /* We can't put uc_sigmask at the end of this structure because we need
+ * to be able to expand sigcontext in the future. For example, the
+ * vector ISA extension will almost certainly add ISA state. We want
+ * to ensure all user-visible ISA state can be saved and restored via a
+ * ucontext, so we're putting this at the end in order to allow for
+ * infinite extensibility. Since we know this will be extended and we
+ * assume sigset_t won't be extended an extreme amount, we're
+ * prioritizing this. */
+ struct compat_sigcontext uc_mcontext;
+};
+
+struct compat_rt_sigframe {
+ struct compat_siginfo info;
+ struct compat_ucontext uc;
+};
+
+#ifdef CONFIG_FPU
+static long compat_restore_fp_state(struct pt_regs *regs,
+ union __riscv_fp_state __user *sc_fpregs)
+{
+ long err;
+ struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
+ size_t i;
+
+ err = __copy_from_user(&current->thread.fstate, state, sizeof(*state));
+ if (unlikely(err))
+ return err;
+
+ fstate_restore(current, regs);
+
+ /* We support no other extension state at this time. */
+ for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) {
+ u32 value;
+
+ err = __get_user(value, &sc_fpregs->q.reserved[i]);
+ if (unlikely(err))
+ break;
+ if (value != 0)
+ return -EINVAL;
+ }
+
+ return err;
+}
+
+static long compat_save_fp_state(struct pt_regs *regs,
+ union __riscv_fp_state __user *sc_fpregs)
+{
+ long err;
+ struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
+ size_t i;
+
+ fstate_save(current, regs);
+ err = __copy_to_user(state, &current->thread.fstate, sizeof(*state));
+ if (unlikely(err))
+ return err;
+
+ /* We support no other extension state at this time. */
+ for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) {
+ err = __put_user(0, &sc_fpregs->q.reserved[i]);
+ if (unlikely(err))
+ break;
+ }
+
+ return err;
+}
+#else
+#define compat_save_fp_state(task, regs) (0)
+#define compat_restore_fp_state(task, regs) (0)
+#endif
+
+static long compat_restore_sigcontext(struct pt_regs *regs,
+ struct compat_sigcontext __user *sc)
+{
+ long err;
+ struct compat_user_regs_struct cregs;
+
+ /* sc_regs is structured the same as the start of pt_regs */
+ err = __copy_from_user(&cregs, &sc->sc_regs, sizeof(sc->sc_regs));
+
+ cregs_to_regs(&cregs, regs);
+
+ /* Restore the floating-point state. */
+ if (has_fpu())
+ err |= compat_restore_fp_state(regs, &sc->sc_fpregs);
+ return err;
+}
+
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
+{
+ struct pt_regs *regs = current_pt_regs();
+ struct compat_rt_sigframe __user *frame;
+ struct task_struct *task;
+ sigset_t set;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current->restart_block.fn = do_no_restart_syscall;
+
+ frame = (struct compat_rt_sigframe __user *)regs->sp;
+
+ if (!access_ok(frame, sizeof(*frame)))
+ goto badframe;
+
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+
+ set_current_blocked(&set);
+
+ if (compat_restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ goto badframe;
+
+ if (compat_restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+
+ return regs->a0;
+
+badframe:
+ task = current;
+ if (show_unhandled_signals) {
+ pr_info_ratelimited(
+ "%s[%d]: bad frame in %s: frame=%p pc=%p sp=%p\n",
+ task->comm, task_pid_nr(task), __func__,
+ frame, (void *)regs->epc, (void *)regs->sp);
+ }
+ force_sig(SIGSEGV);
+ return 0;
+}
+
+static long compat_setup_sigcontext(struct compat_rt_sigframe __user *frame,
+ struct pt_regs *regs)
+{
+ struct compat_sigcontext __user *sc = &frame->uc.uc_mcontext;
+ struct compat_user_regs_struct cregs;
+ long err;
+
+ regs_to_cregs(&cregs, regs);
+
+ /* sc_regs is structured the same as the start of pt_regs */
+ err = __copy_to_user(&sc->sc_regs, &cregs, sizeof(sc->sc_regs));
+ /* Save the floating-point state. */
+ if (has_fpu())
+ err |= compat_save_fp_state(regs, &sc->sc_fpregs);
+ return err;
+}
+
+static inline void __user *compat_get_sigframe(struct ksignal *ksig,
+ struct pt_regs *regs, size_t framesize)
+{
+ unsigned long sp;
+ /* Default to using normal stack */
+ sp = regs->sp;
+
+ /*
+ * If we are on the alternate signal stack and would overflow it, don't.
+ * Return an always-bogus address instead so we will die with SIGSEGV.
+ */
+ if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
+ return (void __user __force *)(-1UL);
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ sp = sigsp(sp, ksig) - framesize;
+
+ /* Align the stack frame. */
+ sp &= ~0xfUL;
+
+ return (void __user *)sp;
+}
+
+int compat_setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
+{
+ struct compat_rt_sigframe __user *frame;
+ long err = 0;
+
+ frame = compat_get_sigframe(ksig, regs, sizeof(*frame));
+ if (!access_ok(frame, sizeof(*frame)))
+ return -EFAULT;
+
+ err |= copy_siginfo_to_user32(&frame->info, &ksig->info);
+
+ /* Create the ucontext. */
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(NULL, &frame->uc.uc_link);
+ err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
+ err |= compat_setup_sigcontext(frame, regs);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ if (err)
+ return -EFAULT;
+
+ regs->ra = (unsigned long)COMPAT_VDSO_SYMBOL(
+ current->mm->context.vdso, rt_sigreturn);
+
+ /*
+ * Set up registers for signal handler.
+ * Registers that we don't modify keep the value they had from
+ * user-space at the time we took the signal.
+ * We always pass siginfo and mcontext, regardless of SA_SIGINFO,
+ * since some things rely on this (e.g. glibc's debug/segfault.c).
+ */
+ regs->epc = (unsigned long)ksig->ka.sa.sa_handler;
+ regs->sp = (unsigned long)frame;
+ regs->a0 = ksig->sig; /* a0: signal number */
+ regs->a1 = (unsigned long)(&frame->info); /* a1: siginfo pointer */
+ regs->a2 = (unsigned long)(&frame->uc); /* a2: ucontext pointer */
+
+#if COMPAT_DEBUG_SIG
+ pr_info("SIG deliver (%s:%d): sig=%d pc=%p ra=%p sp=%p\n",
+ current->comm, task_pid_nr(current), ksig->sig,
+ (void *)regs->epc, (void *)regs->ra, frame);
+#endif
+
+ return 0;
+}
diff --git a/arch/riscv/kernel/compat_syscall_table.c b/arch/riscv/kernel/compat_syscall_table.c
new file mode 100644
index 000000000000..651f2b009c28
--- /dev/null
+++ b/arch/riscv/kernel/compat_syscall_table.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define __SYSCALL_COMPAT
+
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <asm-generic/mman-common.h>
+#include <asm-generic/syscalls.h>
+#include <asm/syscall.h>
+
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+asmlinkage long compat_sys_rt_sigreturn(void);
+
+void * const compat_sys_call_table[__NR_syscalls] = {
+ [0 ... __NR_syscalls - 1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/riscv/kernel/compat_vdso/.gitignore b/arch/riscv/kernel/compat_vdso/.gitignore
new file mode 100644
index 000000000000..19d83d846c1e
--- /dev/null
+++ b/arch/riscv/kernel/compat_vdso/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+compat_vdso.lds
diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile
new file mode 100644
index 000000000000..260daf3236d3
--- /dev/null
+++ b/arch/riscv/kernel/compat_vdso/Makefile
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for compat_vdso
+#
+
+# Symbols present in the compat_vdso
+compat_vdso-syms = rt_sigreturn
+compat_vdso-syms += getcpu
+compat_vdso-syms += flush_icache
+
+COMPAT_CC := $(CC)
+COMPAT_LD := $(LD)
+
+COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32
+COMPAT_LD_FLAGS := -melf32lriscv
+
+# Files to link into the compat_vdso
+obj-compat_vdso = $(patsubst %, %.o, $(compat_vdso-syms)) note.o
+
+# Build rules
+targets := $(obj-compat_vdso) compat_vdso.so compat_vdso.so.dbg compat_vdso.lds
+obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso))
+
+obj-y += compat_vdso.o
+CPPFLAGS_compat_vdso.lds += -P -C -U$(ARCH)
+
+# Disable profiling and instrumentation for VDSO code
+GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+
+# Force dependency
+$(obj)/compat_vdso.o: $(obj)/compat_vdso.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/compat_vdso.so.dbg: $(obj)/compat_vdso.lds $(obj-compat_vdso) FORCE
+ $(call if_changed,compat_vdsold)
+LDFLAGS_compat_vdso.so.dbg = -shared -S -soname=linux-compat_vdso.so.1 \
+ --build-id=sha1 --hash-style=both --eh-frame-hdr
+
+$(obj-compat_vdso): %.o: %.S FORCE
+ $(call if_changed_dep,compat_vdsoas)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# Generate VDSO offsets using helper script
+gen-compat_vdsosym := $(srctree)/$(src)/gen_compat_vdso_offsets.sh
+quiet_cmd_compat_vdsosym = VDSOSYM $@
+ cmd_compat_vdsosym = $(NM) $< | $(gen-compat_vdsosym) | LC_ALL=C sort > $@
+
+include/generated/compat_vdso-offsets.h: $(obj)/compat_vdso.so.dbg FORCE
+ $(call if_changed,compat_vdsosym)
+
+# actual build commands
+# The DSO images are built using a special linker script
+# Make sure only to export the intended __compat_vdso_xxx symbol offsets.
+quiet_cmd_compat_vdsold = VDSOLD $@
+ cmd_compat_vdsold = $(COMPAT_LD) $(ld_flags) $(COMPAT_LD_FLAGS) -T $(filter-out FORCE,$^) -o $@.tmp && \
+ $(OBJCOPY) $(patsubst %, -G __compat_vdso_%, $(compat_vdso-syms)) $@.tmp $@ && \
+ rm $@.tmp
+
+# actual build commands
+quiet_cmd_compat_vdsoas = VDSOAS $@
+ cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_compat_vdso_install = INSTALL $@
+ cmd_compat_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/compat_vdso/$@
+
+compat_vdso.so: $(obj)/compat_vdso.so.dbg
+ @mkdir -p $(MODLIB)/compat_vdso
+ $(call cmd,compat_vdso_install)
+
+compat_vdso_install: compat_vdso.so
diff --git a/arch/riscv/kernel/compat_vdso/compat_vdso.S b/arch/riscv/kernel/compat_vdso/compat_vdso.S
new file mode 100644
index 000000000000..ffd66237e091
--- /dev/null
+++ b/arch/riscv/kernel/compat_vdso/compat_vdso.S
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#define vdso_start compat_vdso_start
+#define vdso_end compat_vdso_end
+
+#define __VDSO_PATH "arch/riscv/kernel/compat_vdso/compat_vdso.so"
+
+#include "../vdso/vdso.S"
diff --git a/arch/riscv/kernel/compat_vdso/compat_vdso.lds.S b/arch/riscv/kernel/compat_vdso/compat_vdso.lds.S
new file mode 100644
index 000000000000..c7c9355d311e
--- /dev/null
+++ b/arch/riscv/kernel/compat_vdso/compat_vdso.lds.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include "../vdso/vdso.lds.S"
diff --git a/arch/riscv/kernel/compat_vdso/flush_icache.S b/arch/riscv/kernel/compat_vdso/flush_icache.S
new file mode 100644
index 000000000000..523dd8b96045
--- /dev/null
+++ b/arch/riscv/kernel/compat_vdso/flush_icache.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include "../vdso/flush_icache.S"
diff --git a/arch/riscv/kernel/compat_vdso/gen_compat_vdso_offsets.sh b/arch/riscv/kernel/compat_vdso/gen_compat_vdso_offsets.sh
new file mode 100755
index 000000000000..8ac070c783b3
--- /dev/null
+++ b/arch/riscv/kernel/compat_vdso/gen_compat_vdso_offsets.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+LC_ALL=C
+sed -n -e 's/^[0]\+\(0[0-9a-fA-F]*\) . \(__vdso_[a-zA-Z0-9_]*\)$/\#define compat\2_offset\t0x\1/p'
diff --git a/arch/riscv/kernel/compat_vdso/getcpu.S b/arch/riscv/kernel/compat_vdso/getcpu.S
new file mode 100644
index 000000000000..10f463efe271
--- /dev/null
+++ b/arch/riscv/kernel/compat_vdso/getcpu.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include "../vdso/getcpu.S"
diff --git a/arch/riscv/kernel/compat_vdso/note.S b/arch/riscv/kernel/compat_vdso/note.S
new file mode 100644
index 000000000000..b10312907542
--- /dev/null
+++ b/arch/riscv/kernel/compat_vdso/note.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include "../vdso/note.S"
diff --git a/arch/riscv/kernel/compat_vdso/rt_sigreturn.S b/arch/riscv/kernel/compat_vdso/rt_sigreturn.S
new file mode 100644
index 000000000000..884aada4facc
--- /dev/null
+++ b/arch/riscv/kernel/compat_vdso/rt_sigreturn.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include "../vdso/rt_sigreturn.S"
diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
index df84e0c13db1..f7a832e3a1d1 100644
--- a/arch/riscv/kernel/cpu-hotplug.c
+++ b/arch/riscv/kernel/cpu-hotplug.c
@@ -12,14 +12,9 @@
#include <linux/sched/hotplug.h>
#include <asm/irq.h>
#include <asm/cpu_ops.h>
+#include <asm/numa.h>
#include <asm/sbi.h>
-void cpu_stop(void);
-void arch_cpu_idle_dead(void)
-{
- cpu_stop();
-}
-
bool cpu_has_hotplug(unsigned int cpu)
{
if (cpu_ops[cpu]->cpu_stop)
@@ -46,6 +41,7 @@ int __cpu_disable(void)
return ret;
remove_cpu_topology(cpu);
+ numa_remove_cpu(cpu);
set_cpu_online(cpu, false);
irq_migrate_all_off_this_cpu();
@@ -75,7 +71,7 @@ void __cpu_die(unsigned int cpu)
/*
* Called from the idle thread for the CPU which has been shutdown.
*/
-void cpu_stop(void)
+void arch_cpu_idle_dead(void)
{
idle_task_exit();
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 6d59e6906fdd..fba9e9f46a8c 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -6,7 +6,9 @@
#include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/of.h>
+#include <asm/hwcap.h>
#include <asm/smp.h>
+#include <asm/pgtable.h>
/*
* Returns the hart ID of the given device tree node, or -ENODEV if the node
@@ -22,7 +24,8 @@ int riscv_of_processor_hartid(struct device_node *node)
return -ENODEV;
}
- if (of_property_read_u32(node, "reg", &hart)) {
+ hart = of_get_cpu_hwid(node, 0);
+ if (hart == ~0U) {
pr_warn("Found CPU without hart ID\n");
return -ENODEV;
}
@@ -61,27 +64,96 @@ int riscv_of_parent_hartid(struct device_node *node)
}
#ifdef CONFIG_PROC_FS
+#define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \
+ { \
+ .uprop = #UPROP, \
+ .isa_ext_id = EXTID, \
+ }
+/*
+ * Here are the ordering rules of extension naming defined by RISC-V
+ * specification :
+ * 1. All extensions should be separated from other multi-letter extensions
+ * by an underscore.
+ * 2. The first letter following the 'Z' conventionally indicates the most
+ * closely related alphabetical extension category, IMAFDQLCBKJTPVH.
+ * If multiple 'Z' extensions are named, they should be ordered first
+ * by category, then alphabetically within a category.
+ * 3. Standard supervisor-level extensions (starts with 'S') should be
+ * listed after standard unprivileged extensions. If multiple
+ * supervisor-level extensions are listed, they should be ordered
+ * alphabetically.
+ * 4. Non-standard extensions (starts with 'X') must be listed after all
+ * standard extensions. They must be separated from other multi-letter
+ * extensions by an underscore.
+ */
+static struct riscv_isa_ext_data isa_ext_arr[] = {
+ __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
+ __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
+ __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
+};
+
+static void print_isa_ext(struct seq_file *f)
+{
+ struct riscv_isa_ext_data *edata;
+ int i = 0, arr_sz;
+
+ arr_sz = ARRAY_SIZE(isa_ext_arr) - 1;
+
+ /* No extension support available */
+ if (arr_sz <= 0)
+ return;
+
+ for (i = 0; i <= arr_sz; i++) {
+ edata = &isa_ext_arr[i];
+ if (!__riscv_isa_extension_available(NULL, edata->isa_ext_id))
+ continue;
+ seq_printf(f, "_%s", edata->uprop);
+ }
+}
+
+/*
+ * These are the only valid base (single letter) ISA extensions as per the spec.
+ * It also specifies the canonical order in which it appears in the spec.
+ * Some of the extension may just be a place holder for now (B, K, P, J).
+ * This should be updated once corresponding extensions are ratified.
+ */
+static const char base_riscv_exts[13] = "imafdqcbkjpvh";
static void print_isa(struct seq_file *f, const char *isa)
{
- /* Print the entire ISA as it is */
+ int i;
+
seq_puts(f, "isa\t\t: ");
- seq_write(f, isa, strlen(isa));
+ /* Print the rv[64/32] part */
+ seq_write(f, isa, 4);
+ for (i = 0; i < sizeof(base_riscv_exts); i++) {
+ if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a'))
+ /* Print only enabled the base ISA extensions */
+ seq_write(f, &base_riscv_exts[i], 1);
+ }
+ print_isa_ext(f);
seq_puts(f, "\n");
}
-static void print_mmu(struct seq_file *f, const char *mmu_type)
+static void print_mmu(struct seq_file *f)
{
+ char sv_type[16];
+
+#ifdef CONFIG_MMU
#if defined(CONFIG_32BIT)
- if (strcmp(mmu_type, "riscv,sv32") != 0)
- return;
+ strncpy(sv_type, "sv32", 5);
#elif defined(CONFIG_64BIT)
- if (strcmp(mmu_type, "riscv,sv39") != 0 &&
- strcmp(mmu_type, "riscv,sv48") != 0)
- return;
+ if (pgtable_l5_enabled)
+ strncpy(sv_type, "sv57", 5);
+ else if (pgtable_l4_enabled)
+ strncpy(sv_type, "sv48", 5);
+ else
+ strncpy(sv_type, "sv39", 5);
#endif
-
- seq_printf(f, "mmu\t\t: %s\n", mmu_type+6);
+#else
+ strncpy(sv_type, "none", 5);
+#endif /* CONFIG_MMU */
+ seq_printf(f, "mmu\t\t: %s\n", sv_type);
}
static void *c_start(struct seq_file *m, loff_t *pos)
@@ -106,14 +178,13 @@ static int c_show(struct seq_file *m, void *v)
{
unsigned long cpu_id = (unsigned long)v - 1;
struct device_node *node = of_get_cpu_node(cpu_id, NULL);
- const char *compat, *isa, *mmu;
+ const char *compat, *isa;
seq_printf(m, "processor\t: %lu\n", cpu_id);
seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
if (!of_property_read_string(node, "riscv,isa", &isa))
print_isa(m, isa);
- if (!of_property_read_string(node, "mmu-type", &mmu))
- print_mmu(m, mmu);
+ print_mmu(m);
if (!of_property_read_string(node, "compatible", &compat)
&& strcmp(compat, "riscv"))
seq_printf(m, "uarch\t\t: %s\n", compat);
diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c
index 0ec22354018c..170d07e57721 100644
--- a/arch/riscv/kernel/cpu_ops.c
+++ b/arch/riscv/kernel/cpu_ops.c
@@ -8,37 +8,29 @@
#include <linux/of.h>
#include <linux/string.h>
#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
#include <asm/smp.h>
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
-void *__cpu_up_stack_pointer[NR_CPUS] __section(.data);
-void *__cpu_up_task_pointer[NR_CPUS] __section(.data);
-
extern const struct cpu_operations cpu_ops_sbi;
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
extern const struct cpu_operations cpu_ops_spinwait;
-
-void cpu_update_secondary_bootdata(unsigned int cpuid,
- struct task_struct *tidle)
-{
- int hartid = cpuid_to_hartid_map(cpuid);
-
- /* Make sure tidle is updated */
- smp_mb();
- WRITE_ONCE(__cpu_up_stack_pointer[hartid],
- task_stack_page(tidle) + THREAD_SIZE);
- WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
-}
+#else
+const struct cpu_operations cpu_ops_spinwait = {
+ .name = "",
+ .cpu_prepare = NULL,
+ .cpu_start = NULL,
+};
+#endif
void __init cpu_set_ops(int cpuid)
{
#if IS_ENABLED(CONFIG_RISCV_SBI)
if (sbi_probe_extension(SBI_EXT_HSM) > 0) {
if (!cpuid)
- pr_info("SBI v0.2 HSM extension detected\n");
+ pr_info("SBI HSM extension detected\n");
cpu_ops[cpuid] = &cpu_ops_sbi;
} else
#endif
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index 685fae72b7f5..4f5a6f84e2a4 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -7,13 +7,22 @@
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
+#include <asm/cpu_ops_sbi.h>
#include <asm/sbi.h>
#include <asm/smp.h>
extern char secondary_start_sbi[];
const struct cpu_operations cpu_ops_sbi;
+/*
+ * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can
+ * be invoked from multiple threads in parallel. Define a per cpu data
+ * to handle that.
+ */
+static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data);
+
static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr,
unsigned long priv)
{
@@ -55,14 +64,19 @@ static int sbi_hsm_hart_get_status(unsigned long hartid)
static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
{
- int rc;
unsigned long boot_addr = __pa_symbol(secondary_start_sbi);
int hartid = cpuid_to_hartid_map(cpuid);
-
- cpu_update_secondary_bootdata(cpuid, tidle);
- rc = sbi_hsm_hart_start(hartid, boot_addr, 0);
-
- return rc;
+ unsigned long hsm_data;
+ struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid);
+
+ /* Make sure tidle is updated */
+ smp_mb();
+ bdata->task_ptr = tidle;
+ bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
+ /* Make sure boot data is updated */
+ smp_mb();
+ hsm_data = __pa(bdata);
+ return sbi_hsm_hart_start(hartid, boot_addr, hsm_data);
}
static int sbi_cpu_prepare(unsigned int cpuid)
@@ -97,7 +111,7 @@ static int sbi_cpu_is_stopped(unsigned int cpuid)
rc = sbi_hsm_hart_get_status(hartid);
- if (rc == SBI_HSM_HART_STATUS_STOPPED)
+ if (rc == SBI_HSM_STATE_STOPPED)
return 0;
return rc;
}
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index b2c957bb68c1..346847f6c41c 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -6,11 +6,36 @@
#include <linux/errno.h>
#include <linux/of.h>
#include <linux/string.h>
+#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
#include <asm/smp.h>
const struct cpu_operations cpu_ops_spinwait;
+void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data");
+void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data");
+
+static void cpu_update_secondary_bootdata(unsigned int cpuid,
+ struct task_struct *tidle)
+{
+ int hartid = cpuid_to_hartid_map(cpuid);
+
+ /*
+ * The hartid must be less than NR_CPUS to avoid out-of-bound access
+ * errors for __cpu_spinwait_stack/task_pointer. That is not always possible
+ * for platforms with discontiguous hartid numbering scheme. That's why
+ * spinwait booting is not the recommended approach for any platforms
+ * booting Linux in S-mode and can be disabled in the future.
+ */
+ if (hartid == INVALID_HARTID || hartid >= NR_CPUS)
+ return;
+
+ /* Make sure tidle is updated */
+ smp_mb();
+ WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
+ task_stack_page(tidle) + THREAD_SIZE);
+ WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
+}
static int spinwait_cpu_prepare(unsigned int cpuid)
{
@@ -28,7 +53,7 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle)
* selects the first cpu to boot the kernel and causes the remainder
* of the cpus to spin in a loop waiting for their stack pointer to be
* setup by that main cpu. Writing to bootdata
- * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they
+ * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they
* can continue the boot process.
*/
cpu_update_secondary_bootdata(cpuid, tidle);
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index ac202f44a670..12b05ce164bb 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -7,19 +7,28 @@
*/
#include <linux/bitmap.h>
+#include <linux/ctype.h>
+#include <linux/libfdt.h>
+#include <linux/module.h>
#include <linux/of.h>
-#include <asm/processor.h>
+#include <asm/alternative.h>
+#include <asm/errata_list.h>
#include <asm/hwcap.h>
+#include <asm/patch.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
#include <asm/smp.h>
#include <asm/switch_to.h>
+#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
+
unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */
static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
#ifdef CONFIG_FPU
-bool has_fpu __read_mostly;
+__ro_after_init DEFINE_STATIC_KEY_FALSE(cpu_hwcap_fpu);
#endif
/**
@@ -59,12 +68,12 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
}
EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
-void riscv_fill_hwcap(void)
+void __init riscv_fill_hwcap(void)
{
struct device_node *node;
const char *isa;
- char print_str[BITS_PER_LONG + 1];
- size_t i, j, isa_len;
+ char print_str[NUM_ALPHA_EXTS + 1];
+ int i, j;
static unsigned long isa2hwcap[256] = {0};
isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
@@ -80,7 +89,8 @@ void riscv_fill_hwcap(void)
for_each_of_cpu_node(node) {
unsigned long this_hwcap = 0;
- unsigned long this_isa = 0;
+ DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX);
+ const char *temp;
if (riscv_of_processor_hartid(node) < 0)
continue;
@@ -90,23 +100,107 @@ void riscv_fill_hwcap(void)
continue;
}
- i = 0;
- isa_len = strlen(isa);
+ temp = isa;
#if IS_ENABLED(CONFIG_32BIT)
if (!strncmp(isa, "rv32", 4))
- i += 4;
+ isa += 4;
#elif IS_ENABLED(CONFIG_64BIT)
if (!strncmp(isa, "rv64", 4))
- i += 4;
+ isa += 4;
#endif
- for (; i < isa_len; ++i) {
- this_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
- /*
- * TODO: X, Y and Z extension parsing for Host ISA
- * bitmap will be added in-future.
- */
- if ('a' <= isa[i] && isa[i] < 'x')
- this_isa |= (1UL << (isa[i] - 'a'));
+ /* The riscv,isa DT property must start with rv64 or rv32 */
+ if (temp == isa)
+ continue;
+ bitmap_zero(this_isa, RISCV_ISA_EXT_MAX);
+ for (; *isa; ++isa) {
+ const char *ext = isa++;
+ const char *ext_end = isa;
+ bool ext_long = false, ext_err = false;
+
+ switch (*ext) {
+ case 's':
+ /**
+ * Workaround for invalid single-letter 's' & 'u'(QEMU).
+ * No need to set the bit in riscv_isa as 's' & 'u' are
+ * not valid ISA extensions. It works until multi-letter
+ * extension starting with "Su" appears.
+ */
+ if (ext[-1] != '_' && ext[1] == 'u') {
+ ++isa;
+ ext_err = true;
+ break;
+ }
+ fallthrough;
+ case 'x':
+ case 'z':
+ ext_long = true;
+ /* Multi-letter extension must be delimited */
+ for (; *isa && *isa != '_'; ++isa)
+ if (unlikely(!islower(*isa)
+ && !isdigit(*isa)))
+ ext_err = true;
+ /* Parse backwards */
+ ext_end = isa;
+ if (unlikely(ext_err))
+ break;
+ if (!isdigit(ext_end[-1]))
+ break;
+ /* Skip the minor version */
+ while (isdigit(*--ext_end))
+ ;
+ if (ext_end[0] != 'p'
+ || !isdigit(ext_end[-1])) {
+ /* Advance it to offset the pre-decrement */
+ ++ext_end;
+ break;
+ }
+ /* Skip the major version */
+ while (isdigit(*--ext_end))
+ ;
+ ++ext_end;
+ break;
+ default:
+ if (unlikely(!islower(*ext))) {
+ ext_err = true;
+ break;
+ }
+ /* Find next extension */
+ if (!isdigit(*isa))
+ break;
+ /* Skip the minor version */
+ while (isdigit(*++isa))
+ ;
+ if (*isa != 'p')
+ break;
+ if (!isdigit(*++isa)) {
+ --isa;
+ break;
+ }
+ /* Skip the major version */
+ while (isdigit(*++isa))
+ ;
+ break;
+ }
+ if (*isa != '_')
+ --isa;
+
+#define SET_ISA_EXT_MAP(name, bit) \
+ do { \
+ if ((ext_end - ext == sizeof(name) - 1) && \
+ !memcmp(ext, name, sizeof(name) - 1)) \
+ set_bit(bit, this_isa); \
+ } while (false) \
+
+ if (unlikely(ext_err))
+ continue;
+ if (!ext_long) {
+ this_hwcap |= isa2hwcap[(unsigned char)(*ext)];
+ set_bit(*ext - 'a', this_isa);
+ } else {
+ SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
+ SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
+ }
+#undef SET_ISA_EXT_MAP
}
/*
@@ -119,10 +213,10 @@ void riscv_fill_hwcap(void)
else
elf_hwcap = this_hwcap;
- if (riscv_isa[0])
- riscv_isa[0] &= this_isa;
+ if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX))
+ bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
else
- riscv_isa[0] = this_isa;
+ bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
}
/* We don't support systems with F but without D, so mask those out
@@ -133,19 +227,87 @@ void riscv_fill_hwcap(void)
}
memset(print_str, 0, sizeof(print_str));
- for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+ for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
if (riscv_isa[0] & BIT_MASK(i))
print_str[j++] = (char)('a' + i);
- pr_info("riscv: ISA extensions %s\n", print_str);
+ pr_info("riscv: base ISA extensions %s\n", print_str);
memset(print_str, 0, sizeof(print_str));
- for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+ for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
if (elf_hwcap & BIT_MASK(i))
print_str[j++] = (char)('a' + i);
pr_info("riscv: ELF capabilities %s\n", print_str);
#ifdef CONFIG_FPU
if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
- has_fpu = true;
+ static_branch_enable(&cpu_hwcap_fpu);
#endif
}
+
+#ifdef CONFIG_RISCV_ALTERNATIVE
+struct cpufeature_info {
+ char name[ERRATA_STRING_LENGTH_MAX];
+ bool (*check_func)(unsigned int stage);
+};
+
+static bool __init_or_module cpufeature_svpbmt_check_func(unsigned int stage)
+{
+#ifdef CONFIG_RISCV_ISA_SVPBMT
+ switch (stage) {
+ case RISCV_ALTERNATIVES_EARLY_BOOT:
+ return false;
+ default:
+ return riscv_isa_extension_available(NULL, SVPBMT);
+ }
+#endif
+
+ return false;
+}
+
+static const struct cpufeature_info __initdata_or_module
+cpufeature_list[CPUFEATURE_NUMBER] = {
+ {
+ .name = "svpbmt",
+ .check_func = cpufeature_svpbmt_check_func
+ },
+};
+
+static u32 __init_or_module cpufeature_probe(unsigned int stage)
+{
+ const struct cpufeature_info *info;
+ u32 cpu_req_feature = 0;
+ int idx;
+
+ for (idx = 0; idx < CPUFEATURE_NUMBER; idx++) {
+ info = &cpufeature_list[idx];
+
+ if (info->check_func(stage))
+ cpu_req_feature |= (1U << idx);
+ }
+
+ return cpu_req_feature;
+}
+
+void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
+ struct alt_entry *end,
+ unsigned int stage)
+{
+ u32 cpu_req_feature = cpufeature_probe(stage);
+ struct alt_entry *alt;
+ u32 tmp;
+
+ for (alt = begin; alt < end; alt++) {
+ if (alt->vendor_id != 0)
+ continue;
+ if (alt->errata_id >= CPUFEATURE_NUMBER) {
+ WARN(1, "This feature id:%d is not in kernel cpufeature list",
+ alt->errata_id);
+ continue;
+ }
+
+ tmp = (1U << alt->errata_id);
+ if (cpu_req_feature & tmp)
+ patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
+ }
+}
+#endif
diff --git a/arch/riscv/kernel/crash_dump.c b/arch/riscv/kernel/crash_dump.c
new file mode 100644
index 000000000000..ea2158cee97b
--- /dev/null
+++ b/arch/riscv/kernel/crash_dump.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code comes from arch/arm64/kernel/crash_dump.c
+ * Created by: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ * Copyright (C) 2017 Linaro Limited
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+#include <linux/uio.h>
+
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+ size_t csize, unsigned long offset)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+ if (!vaddr)
+ return -ENOMEM;
+
+ csize = copy_to_iter(vaddr + offset, csize, iter);
+
+ memunmap(vaddr);
+ return csize;
+}
diff --git a/arch/riscv/kernel/crash_save_regs.S b/arch/riscv/kernel/crash_save_regs.S
new file mode 100644
index 000000000000..7832fb763aba
--- /dev/null
+++ b/arch/riscv/kernel/crash_save_regs.S
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
+ */
+
+#include <asm/asm.h> /* For RISCV_* and REG_* macros */
+#include <asm/csr.h> /* For CSR_* macros */
+#include <asm/asm-offsets.h> /* For offsets on pt_regs */
+#include <linux/linkage.h> /* For SYM_* macros */
+
+.section ".text"
+SYM_CODE_START(riscv_crash_save_regs)
+ REG_S ra, PT_RA(a0) /* x1 */
+ REG_S sp, PT_SP(a0) /* x2 */
+ REG_S gp, PT_GP(a0) /* x3 */
+ REG_S tp, PT_TP(a0) /* x4 */
+ REG_S t0, PT_T0(a0) /* x5 */
+ REG_S t1, PT_T1(a0) /* x6 */
+ REG_S t2, PT_T2(a0) /* x7 */
+ REG_S s0, PT_S0(a0) /* x8/fp */
+ REG_S s1, PT_S1(a0) /* x9 */
+ REG_S a0, PT_A0(a0) /* x10 */
+ REG_S a1, PT_A1(a0) /* x11 */
+ REG_S a2, PT_A2(a0) /* x12 */
+ REG_S a3, PT_A3(a0) /* x13 */
+ REG_S a4, PT_A4(a0) /* x14 */
+ REG_S a5, PT_A5(a0) /* x15 */
+ REG_S a6, PT_A6(a0) /* x16 */
+ REG_S a7, PT_A7(a0) /* x17 */
+ REG_S s2, PT_S2(a0) /* x18 */
+ REG_S s3, PT_S3(a0) /* x19 */
+ REG_S s4, PT_S4(a0) /* x20 */
+ REG_S s5, PT_S5(a0) /* x21 */
+ REG_S s6, PT_S6(a0) /* x22 */
+ REG_S s7, PT_S7(a0) /* x23 */
+ REG_S s8, PT_S8(a0) /* x24 */
+ REG_S s9, PT_S9(a0) /* x25 */
+ REG_S s10, PT_S10(a0) /* x26 */
+ REG_S s11, PT_S11(a0) /* x27 */
+ REG_S t3, PT_T3(a0) /* x28 */
+ REG_S t4, PT_T4(a0) /* x29 */
+ REG_S t5, PT_T5(a0) /* x30 */
+ REG_S t6, PT_T6(a0) /* x31 */
+
+ csrr t1, CSR_STATUS
+ csrr t2, CSR_EPC
+ csrr t3, CSR_TVAL
+ csrr t4, CSR_CAUSE
+
+ REG_S t1, PT_STATUS(a0)
+ REG_S t2, PT_EPC(a0)
+ REG_S t3, PT_BADADDR(a0)
+ REG_S t4, PT_CAUSE(a0)
+ ret
+SYM_CODE_END(riscv_crash_save_regs)
diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
new file mode 100644
index 000000000000..8e733aa48ba6
--- /dev/null
+++ b/arch/riscv/kernel/efi-header.S
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ * Adapted from arch/arm64/kernel/efi-header.S
+ */
+
+#include <linux/pe.h>
+#include <linux/sizes.h>
+
+ .macro __EFI_PE_HEADER
+ .long PE_MAGIC
+coff_header:
+#ifdef CONFIG_64BIT
+ .short IMAGE_FILE_MACHINE_RISCV64 // Machine
+#else
+ .short IMAGE_FILE_MACHINE_RISCV32 // Machine
+#endif
+ .short section_count // NumberOfSections
+ .long 0 // TimeDateStamp
+ .long 0 // PointerToSymbolTable
+ .long 0 // NumberOfSymbols
+ .short section_table - optional_header // SizeOfOptionalHeader
+ .short IMAGE_FILE_DEBUG_STRIPPED | \
+ IMAGE_FILE_EXECUTABLE_IMAGE | \
+ IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics
+
+optional_header:
+#ifdef CONFIG_64BIT
+ .short PE_OPT_MAGIC_PE32PLUS // PE32+ format
+#else
+ .short PE_OPT_MAGIC_PE32 // PE32 format
+#endif
+ .byte 0x02 // MajorLinkerVersion
+ .byte 0x14 // MinorLinkerVersion
+ .long __pecoff_text_end - efi_header_end // SizeOfCode
+ .long __pecoff_data_virt_size // SizeOfInitializedData
+ .long 0 // SizeOfUninitializedData
+ .long __efistub_efi_pe_entry - _start // AddressOfEntryPoint
+ .long efi_header_end - _start // BaseOfCode
+#ifdef CONFIG_32BIT
+ .long __pecoff_text_end - _start // BaseOfData
+#endif
+
+extra_header_fields:
+ .quad 0 // ImageBase
+ .long PECOFF_SECTION_ALIGNMENT // SectionAlignment
+ .long PECOFF_FILE_ALIGNMENT // FileAlignment
+ .short 0 // MajorOperatingSystemVersion
+ .short 0 // MinorOperatingSystemVersion
+ .short LINUX_EFISTUB_MAJOR_VERSION // MajorImageVersion
+ .short LINUX_EFISTUB_MINOR_VERSION // MinorImageVersion
+ .short 0 // MajorSubsystemVersion
+ .short 0 // MinorSubsystemVersion
+ .long 0 // Win32VersionValue
+
+ .long _end - _start // SizeOfImage
+
+ // Everything before the kernel image is considered part of the header
+ .long efi_header_end - _start // SizeOfHeaders
+ .long 0 // CheckSum
+ .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
+ .short 0 // DllCharacteristics
+ .quad 0 // SizeOfStackReserve
+ .quad 0 // SizeOfStackCommit
+ .quad 0 // SizeOfHeapReserve
+ .quad 0 // SizeOfHeapCommit
+ .long 0 // LoaderFlags
+ .long (section_table - .) / 8 // NumberOfRvaAndSizes
+
+ .quad 0 // ExportTable
+ .quad 0 // ImportTable
+ .quad 0 // ResourceTable
+ .quad 0 // ExceptionTable
+ .quad 0 // CertificationTable
+ .quad 0 // BaseRelocationTable
+
+ // Section table
+section_table:
+ .ascii ".text\0\0\0"
+ .long __pecoff_text_end - efi_header_end // VirtualSize
+ .long efi_header_end - _start // VirtualAddress
+ .long __pecoff_text_end - efi_header_end // SizeOfRawData
+ .long efi_header_end - _start // PointerToRawData
+
+ .long 0 // PointerToRelocations
+ .long 0 // PointerToLineNumbers
+ .short 0 // NumberOfRelocations
+ .short 0 // NumberOfLineNumbers
+ .long IMAGE_SCN_CNT_CODE | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_EXECUTE // Characteristics
+
+ .ascii ".data\0\0\0"
+ .long __pecoff_data_virt_size // VirtualSize
+ .long __pecoff_text_end - _start // VirtualAddress
+ .long __pecoff_data_raw_size // SizeOfRawData
+ .long __pecoff_text_end - _start // PointerToRawData
+
+ .long 0 // PointerToRelocations
+ .long 0 // PointerToLineNumbers
+ .short 0 // NumberOfRelocations
+ .short 0 // NumberOfLineNumbers
+ .long IMAGE_SCN_CNT_INITIALIZED_DATA | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_WRITE // Characteristics
+
+ .set section_count, (. - section_table) / 40
+
+ .balign 0x1000
+efi_header_end:
+ .endm
diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c
new file mode 100644
index 000000000000..1aa540350abd
--- /dev/null
+++ b/arch/riscv/kernel/efi.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ * Adapted from arch/arm64/kernel/efi.c
+ */
+
+#include <linux/efi.h>
+#include <linux/init.h>
+
+#include <asm/efi.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-bits.h>
+
+/*
+ * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+ * executable, everything else can be mapped with the XN bits
+ * set. Also take the new (optional) RO/XP bits into account.
+ */
+static __init pgprot_t efimem_to_pgprot_map(efi_memory_desc_t *md)
+{
+ u64 attr = md->attribute;
+ u32 type = md->type;
+
+ if (type == EFI_MEMORY_MAPPED_IO)
+ return PAGE_KERNEL;
+
+ /* R-- */
+ if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
+ (EFI_MEMORY_XP | EFI_MEMORY_RO))
+ return PAGE_KERNEL_READ;
+
+ /* R-X */
+ if (attr & EFI_MEMORY_RO)
+ return PAGE_KERNEL_READ_EXEC;
+
+ /* RW- */
+ if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) ==
+ EFI_MEMORY_XP) ||
+ type != EFI_RUNTIME_SERVICES_CODE)
+ return PAGE_KERNEL;
+
+ /* RWX */
+ return PAGE_KERNEL_EXEC;
+}
+
+int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
+{
+ pgprot_t prot = __pgprot(pgprot_val(efimem_to_pgprot_map(md)) &
+ ~(_PAGE_GLOBAL));
+ int i;
+
+ /* RISC-V maps one page at a time */
+ for (i = 0; i < md->num_pages; i++)
+ create_pgd_mapping(mm->pgd, md->virt_addr + i * PAGE_SIZE,
+ md->phys_addr + i * PAGE_SIZE,
+ PAGE_SIZE, prot);
+ return 0;
+}
+
+static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
+{
+ efi_memory_desc_t *md = data;
+ pte_t pte = READ_ONCE(*ptep);
+ unsigned long val;
+
+ if (md->attribute & EFI_MEMORY_RO) {
+ val = pte_val(pte) & ~_PAGE_WRITE;
+ val |= _PAGE_READ;
+ pte = __pte(val);
+ }
+ if (md->attribute & EFI_MEMORY_XP) {
+ val = pte_val(pte) & ~_PAGE_EXEC;
+ pte = __pte(val);
+ }
+ set_pte(ptep, pte);
+
+ return 0;
+}
+
+int __init efi_set_mapping_permissions(struct mm_struct *mm,
+ efi_memory_desc_t *md)
+{
+ BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
+ md->type != EFI_RUNTIME_SERVICES_DATA);
+
+ /*
+ * Calling apply_to_page_range() is only safe on regions that are
+ * guaranteed to be mapped down to pages. Since we are only called
+ * for regions that have been mapped using efi_create_mapping() above
+ * (and this is checked by the generic Memory Attributes table parsing
+ * routines), there is no need to check that again here.
+ */
+ return apply_to_page_range(mm, md->virt_addr,
+ md->num_pages << EFI_PAGE_SHIFT,
+ set_permissions, md);
+}
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
new file mode 100644
index 000000000000..9cb85095fd45
--- /dev/null
+++ b/arch/riscv/kernel/elf_kexec.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2021 Huawei Technologies Co, Ltd.
+ *
+ * Author: Liao Chang (liaochang1@huawei.com)
+ *
+ * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
+ * for kernel.
+ */
+
+#define pr_fmt(fmt) "kexec_image: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/libfdt.h>
+#include <linux/types.h>
+#include <linux/memblock.h>
+#include <asm/setup.h>
+
+static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
+ struct kexec_elf_info *elf_info, unsigned long old_pbase,
+ unsigned long new_pbase)
+{
+ int i;
+ int ret = 0;
+ size_t size;
+ struct kexec_buf kbuf;
+ const struct elf_phdr *phdr;
+
+ kbuf.image = image;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ size = phdr->p_filesz;
+ if (size > phdr->p_memsz)
+ size = phdr->p_memsz;
+
+ kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
+ kbuf.bufsz = size;
+ kbuf.buf_align = phdr->p_align;
+ kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
+ kbuf.memsz = phdr->p_memsz;
+ kbuf.top_down = false;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Go through the available phsyical memory regions and find one that hold
+ * an image of the specified size.
+ */
+static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
+ struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
+ unsigned long *old_pbase, unsigned long *new_pbase)
+{
+ int i;
+ int ret;
+ struct kexec_buf kbuf;
+ const struct elf_phdr *phdr;
+ unsigned long lowest_paddr = ULONG_MAX;
+ unsigned long lowest_vaddr = ULONG_MAX;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ if (lowest_paddr > phdr->p_paddr)
+ lowest_paddr = phdr->p_paddr;
+
+ if (lowest_vaddr > phdr->p_vaddr)
+ lowest_vaddr = phdr->p_vaddr;
+ }
+
+ kbuf.image = image;
+ kbuf.buf_min = lowest_paddr;
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
+ kbuf.top_down = false;
+ ret = arch_kexec_locate_mem_hole(&kbuf);
+ if (!ret) {
+ *old_pbase = lowest_paddr;
+ *new_pbase = kbuf.mem;
+ image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
+ }
+ return ret;
+}
+
+static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
+{
+ unsigned int *nr_ranges = arg;
+
+ (*nr_ranges)++;
+ return 0;
+}
+
+static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
+{
+ struct crash_mem *cmem = arg;
+
+ cmem->ranges[cmem->nr_ranges].start = res->start;
+ cmem->ranges[cmem->nr_ranges].end = res->end;
+ cmem->nr_ranges++;
+
+ return 0;
+}
+
+static int prepare_elf_headers(void **addr, unsigned long *sz)
+{
+ struct crash_mem *cmem;
+ unsigned int nr_ranges;
+ int ret;
+
+ nr_ranges = 1; /* For exclusion of crashkernel region */
+ walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
+
+ cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
+ if (!cmem)
+ return -ENOMEM;
+
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
+ ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
+ if (ret)
+ goto out;
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ if (!ret)
+ ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+
+out:
+ kfree(cmem);
+ return ret;
+}
+
+static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int elfcorehdr_strlen;
+ char *cmdline_ptr;
+
+ cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+ if (!cmdline_ptr)
+ return NULL;
+
+ elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
+ image->elf_load_addr);
+
+ if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
+ pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
+ kfree(cmdline_ptr);
+ return NULL;
+ }
+
+ memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
+ /* Ensure it's nul terminated */
+ cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
+ return cmdline_ptr;
+}
+
+static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ unsigned long old_kernel_pbase = ULONG_MAX;
+ unsigned long new_kernel_pbase = 0UL;
+ unsigned long initrd_pbase = 0UL;
+ unsigned long headers_sz;
+ unsigned long kernel_start;
+ void *fdt, *headers;
+ struct elfhdr ehdr;
+ struct kexec_buf kbuf;
+ struct kexec_elf_info elf_info;
+ char *modified_cmdline = NULL;
+
+ ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ if (ret)
+ return ERR_PTR(ret);
+
+ ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
+ &old_kernel_pbase, &new_kernel_pbase);
+ if (ret)
+ goto out;
+ kernel_start = image->start;
+ pr_notice("The entry point of kernel at 0x%lx\n", image->start);
+
+ /* Add the kernel binary to the image */
+ ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
+ old_kernel_pbase, new_kernel_pbase);
+ if (ret)
+ goto out;
+
+ kbuf.image = image;
+ kbuf.buf_min = new_kernel_pbase + kernel_len;
+ kbuf.buf_max = ULONG_MAX;
+
+ /* Add elfcorehdr */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = prepare_elf_headers(&headers, &headers_sz);
+ if (ret) {
+ pr_err("Preparing elf core header failed\n");
+ goto out;
+ }
+
+ kbuf.buffer = headers;
+ kbuf.bufsz = headers_sz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = headers_sz;
+ kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+ kbuf.top_down = true;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ vfree(headers);
+ goto out;
+ }
+ image->elf_headers = headers;
+ image->elf_load_addr = kbuf.mem;
+ image->elf_headers_sz = headers_sz;
+
+ pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
+
+ /* Setup cmdline for kdump kernel case */
+ modified_cmdline = setup_kdump_cmdline(image, cmdline,
+ cmdline_len);
+ if (!modified_cmdline) {
+ pr_err("Setting up cmdline for kdump kernel failed\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ cmdline = modified_cmdline;
+ }
+
+#ifdef CONFIG_ARCH_HAS_KEXEC_PURGATORY
+ /* Add purgatory to the image */
+ kbuf.top_down = true;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_load_purgatory(image, &kbuf);
+ if (ret) {
+ pr_err("Error loading purgatory ret=%d\n", ret);
+ goto out;
+ }
+ ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
+ &kernel_start,
+ sizeof(kernel_start), 0);
+ if (ret)
+ pr_err("Error update purgatory ret=%d\n", ret);
+#endif /* CONFIG_ARCH_HAS_KEXEC_PURGATORY */
+
+ /* Add the initrd to the image */
+ if (initrd != NULL) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = false;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ initrd_pbase = kbuf.mem;
+ pr_notice("Loaded initrd at 0x%lx\n", initrd_pbase);
+ }
+
+ /* Add the DTB to the image */
+ fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
+ initrd_len, cmdline, 0);
+ if (!fdt) {
+ pr_err("Error setting up the new device tree.\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ fdt_pack(fdt);
+ kbuf.buffer = fdt;
+ kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.top_down = true;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ pr_err("Error add DTB kbuf ret=%d\n", ret);
+ goto out_free_fdt;
+ }
+ pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem);
+ goto out;
+
+out_free_fdt:
+ kvfree(fdt);
+out:
+ kfree(modified_cmdline);
+ kexec_free_elf_info(&elf_info);
+ return ret ? ERR_PTR(ret) : NULL;
+}
+
+#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
+#define RISCV_IMM_BITS 12
+#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
+#define RISCV_CONST_HIGH_PART(x) \
+ (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
+#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
+
+#define ENCODE_ITYPE_IMM(x) \
+ (RV_X(x, 0, 12) << 20)
+#define ENCODE_BTYPE_IMM(x) \
+ ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
+ (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
+#define ENCODE_UTYPE_IMM(x) \
+ (RV_X(x, 12, 20) << 12)
+#define ENCODE_JTYPE_IMM(x) \
+ ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
+ (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
+#define ENCODE_CBTYPE_IMM(x) \
+ ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
+ (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
+#define ENCODE_CJTYPE_IMM(x) \
+ ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
+ (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
+ (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
+#define ENCODE_UJTYPE_IMM(x) \
+ (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
+ (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
+#define ENCODE_UITYPE_IMM(x) \
+ (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
+
+#define CLEAN_IMM(type, x) \
+ ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
+
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab)
+{
+ const char *strtab, *name, *shstrtab;
+ const Elf_Shdr *sechdrs;
+ Elf_Rela *relas;
+ int i, r_type;
+
+ /* String & section header string table */
+ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+ strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
+ shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
+
+ relas = (void *)pi->ehdr + relsec->sh_offset;
+
+ for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
+ const Elf_Sym *sym; /* symbol to relocate */
+ unsigned long addr; /* final location after relocation */
+ unsigned long val; /* relocated symbol value */
+ unsigned long sec_base; /* relocated symbol value */
+ void *loc; /* tmp location to modify */
+
+ sym = (void *)pi->ehdr + symtab->sh_offset;
+ sym += ELF64_R_SYM(relas[i].r_info);
+
+ if (sym->st_name)
+ name = strtab + sym->st_name;
+ else
+ name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+ loc = pi->purgatory_buf;
+ loc += section->sh_offset;
+ loc += relas[i].r_offset;
+
+ if (sym->st_shndx == SHN_ABS)
+ sec_base = 0;
+ else if (sym->st_shndx >= pi->ehdr->e_shnum) {
+ pr_err("Invalid section %d for symbol %s\n",
+ sym->st_shndx, name);
+ return -ENOEXEC;
+ } else
+ sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
+
+ val = sym->st_value;
+ val += sec_base;
+ val += relas[i].r_addend;
+
+ addr = section->sh_addr + relas[i].r_offset;
+
+ r_type = ELF64_R_TYPE(relas[i].r_info);
+
+ switch (r_type) {
+ case R_RISCV_BRANCH:
+ *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
+ ENCODE_BTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_JAL:
+ *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
+ ENCODE_JTYPE_IMM(val - addr);
+ break;
+ /*
+ * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
+ * sym is expected to be next to R_RISCV_PCREL_HI20
+ * in purgatory relsec. Handle it like R_RISCV_CALL
+ * sym, instead of searching the whole relsec.
+ */
+ case R_RISCV_PCREL_HI20:
+ case R_RISCV_CALL:
+ *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
+ ENCODE_UJTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_RVC_BRANCH:
+ *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
+ ENCODE_CBTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_RVC_JUMP:
+ *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
+ ENCODE_CJTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_ADD32:
+ *(u32 *)loc += val;
+ break;
+ case R_RISCV_SUB32:
+ *(u32 *)loc -= val;
+ break;
+ /* It has been applied by R_RISCV_PCREL_HI20 sym */
+ case R_RISCV_PCREL_LO12_I:
+ case R_RISCV_ALIGN:
+ case R_RISCV_RELAX:
+ break;
+ default:
+ pr_err("Unknown rela relocation: %d\n", r_type);
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+}
+
+const struct kexec_file_ops elf_kexec_ops = {
+ .probe = kexec_elf_probe,
+ .load = elf_kexec_load,
+};
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index cae7e6d4c7ef..2e5b88ca11ce 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -12,6 +12,7 @@
#include <asm/unistd.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
+#include <asm/errata_list.h>
#if !IS_ENABLED(CONFIG_PREEMPTION)
.set resume_kernel, restore_all
@@ -29,6 +30,15 @@ ENTRY(handle_exception)
_restore_kernel_tpsp:
csrr tp, CSR_SCRATCH
REG_S sp, TASK_TI_KERNEL_SP(tp)
+
+#ifdef CONFIG_VMAP_STACK
+ addi sp, sp, -(PT_SIZE_ON_STACK)
+ srli sp, sp, THREAD_SHIFT
+ andi sp, sp, 0x1
+ bnez sp, handle_kernel_stack_overflow
+ REG_L sp, TASK_TI_KERNEL_SP(tp)
+#endif
+
_save_context:
REG_S sp, TASK_TI_USER_SP(tp)
REG_L sp, TASK_TI_KERNEL_SP(tp)
@@ -97,17 +107,30 @@ _save_context:
la gp, __global_pointer$
.option pop
- la ra, ret_from_exception
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call __trace_hardirqs_off
+#endif
+
+#ifdef CONFIG_CONTEXT_TRACKING
+ /* If previous state is in user mode, call context_tracking_user_exit. */
+ li a0, SR_PP
+ and a0, s1, a0
+ bnez a0, skip_context_tracking
+ call context_tracking_user_exit
+skip_context_tracking:
+#endif
+
/*
* MSB of cause differentiates between
* interrupts and exceptions
*/
bge s4, zero, 1f
+ la ra, ret_from_exception
+
/* Handle interrupts */
move a0, sp /* pt_regs */
- la a1, handle_arch_irq
- REG_L a1, (a1)
+ la a1, generic_handle_arch_irq
jr a1
1:
/*
@@ -116,9 +139,16 @@ _save_context:
*/
andi t0, s1, SR_PIE
beqz t0, 1f
+ /* kprobes, entered via ebreak, must have interrupts disabled. */
+ li t0, EXC_BREAKPOINT
+ beq s4, t0, 1f
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call __trace_hardirqs_on
+#endif
csrs CSR_STATUS, SR_IE
1:
+ la ra, ret_from_exception
/* Handle syscalls */
li t0, EXC_SYSCALL
beq s4, t0, handle_syscall
@@ -137,6 +167,26 @@ _save_context:
tail do_trap_unknown
handle_syscall:
+#ifdef CONFIG_RISCV_M_MODE
+ /*
+ * When running is M-Mode (no MMU config), MPIE does not get set.
+ * As a result, we need to force enable interrupts here because
+ * handle_exception did not do set SR_IE as it always sees SR_PIE
+ * being cleared.
+ */
+ csrs CSR_STATUS, SR_IE
+#endif
+#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING)
+ /* Recover a0 - a7 for system calls */
+ REG_L a0, PT_A0(sp)
+ REG_L a1, PT_A1(sp)
+ REG_L a2, PT_A2(sp)
+ REG_L a3, PT_A3(sp)
+ REG_L a4, PT_A4(sp)
+ REG_L a5, PT_A5(sp)
+ REG_L a6, PT_A6(sp)
+ REG_L a7, PT_A7(sp)
+#endif
/* save the initial A0 value (needed in signal handlers) */
REG_S a0, PT_ORIG_A0(sp)
/*
@@ -157,20 +207,27 @@ check_syscall_nr:
* Syscall number held in a7.
* If syscall number is above allowed value, redirect to ni_syscall.
*/
- bge a7, t0, 1f
- /*
- * Check if syscall is rejected by tracer, i.e., a7 == -1.
- * If yes, we pretend it was executed.
- */
- li t1, -1
- beq a7, t1, ret_from_syscall_rejected
- blt a7, t1, 1f
+ bgeu a7, t0, 3f
+#ifdef CONFIG_COMPAT
+ REG_L s0, PT_STATUS(sp)
+ srli s0, s0, SR_UXL_SHIFT
+ andi s0, s0, (SR_UXL >> SR_UXL_SHIFT)
+ li t0, (SR_UXL_32 >> SR_UXL_SHIFT)
+ sub t0, s0, t0
+ bnez t0, 1f
+
+ /* Call compat_syscall */
+ la s0, compat_sys_call_table
+ j 2f
+1:
+#endif
/* Call syscall */
la s0, sys_call_table
+2:
slli t0, a7, RISCV_LGPTR
add s0, s0, t0
REG_L s0, 0(s0)
-1:
+3:
jalr s0
ret_from_syscall:
@@ -182,6 +239,10 @@ ret_from_syscall:
* (If it was configured with SECCOMP_RET_ERRNO/TRACE)
*/
ret_from_syscall_rejected:
+#ifdef CONFIG_DEBUG_RSEQ
+ move a0, sp
+ call rseq_syscall
+#endif
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_WORK
@@ -190,6 +251,9 @@ ret_from_syscall_rejected:
ret_from_exception:
REG_L s0, PT_STATUS(sp)
csrc CSR_STATUS, SR_IE
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call __trace_hardirqs_off
+#endif
#ifdef CONFIG_RISCV_M_MODE
/* the MPP value is too large to be used as an immediate arg for addi */
li t0, SR_MPP
@@ -205,6 +269,10 @@ resume_userspace:
andi s1, s0, _TIF_WORK_MASK
bnez s1, work_pending
+#ifdef CONFIG_CONTEXT_TRACKING
+ call context_tracking_user_enter
+#endif
+
/* Save unwound kernel stack pointer in thread_info */
addi s0, sp, PT_SIZE_ON_STACK
REG_S s0, TASK_TI_KERNEL_SP(tp)
@@ -216,6 +284,16 @@ resume_userspace:
csrw CSR_SCRATCH, tp
restore_all:
+#ifdef CONFIG_TRACE_IRQFLAGS
+ REG_L s1, PT_STATUS(sp)
+ andi t0, s1, SR_PIE
+ beqz t0, 1f
+ call __trace_hardirqs_on
+ j 2f
+1:
+ call __trace_hardirqs_off
+2:
+#endif
REG_L a0, PT_STATUS(sp)
/*
* The current load reservation is effectively part of the processor's
@@ -324,6 +402,105 @@ handle_syscall_trace_exit:
call do_syscall_trace_exit
j ret_from_exception
+#ifdef CONFIG_VMAP_STACK
+handle_kernel_stack_overflow:
+ la sp, shadow_stack
+ addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
+
+ //save caller register to shadow stack
+ addi sp, sp, -(PT_SIZE_ON_STACK)
+ REG_S x1, PT_RA(sp)
+ REG_S x5, PT_T0(sp)
+ REG_S x6, PT_T1(sp)
+ REG_S x7, PT_T2(sp)
+ REG_S x10, PT_A0(sp)
+ REG_S x11, PT_A1(sp)
+ REG_S x12, PT_A2(sp)
+ REG_S x13, PT_A3(sp)
+ REG_S x14, PT_A4(sp)
+ REG_S x15, PT_A5(sp)
+ REG_S x16, PT_A6(sp)
+ REG_S x17, PT_A7(sp)
+ REG_S x28, PT_T3(sp)
+ REG_S x29, PT_T4(sp)
+ REG_S x30, PT_T5(sp)
+ REG_S x31, PT_T6(sp)
+
+ la ra, restore_caller_reg
+ tail get_overflow_stack
+
+restore_caller_reg:
+ //save per-cpu overflow stack
+ REG_S a0, -8(sp)
+ //restore caller register from shadow_stack
+ REG_L x1, PT_RA(sp)
+ REG_L x5, PT_T0(sp)
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x10, PT_A0(sp)
+ REG_L x11, PT_A1(sp)
+ REG_L x12, PT_A2(sp)
+ REG_L x13, PT_A3(sp)
+ REG_L x14, PT_A4(sp)
+ REG_L x15, PT_A5(sp)
+ REG_L x16, PT_A6(sp)
+ REG_L x17, PT_A7(sp)
+ REG_L x28, PT_T3(sp)
+ REG_L x29, PT_T4(sp)
+ REG_L x30, PT_T5(sp)
+ REG_L x31, PT_T6(sp)
+
+ //load per-cpu overflow stack
+ REG_L sp, -8(sp)
+ addi sp, sp, -(PT_SIZE_ON_STACK)
+
+ //save context to overflow stack
+ REG_S x1, PT_RA(sp)
+ REG_S x3, PT_GP(sp)
+ REG_S x5, PT_T0(sp)
+ REG_S x6, PT_T1(sp)
+ REG_S x7, PT_T2(sp)
+ REG_S x8, PT_S0(sp)
+ REG_S x9, PT_S1(sp)
+ REG_S x10, PT_A0(sp)
+ REG_S x11, PT_A1(sp)
+ REG_S x12, PT_A2(sp)
+ REG_S x13, PT_A3(sp)
+ REG_S x14, PT_A4(sp)
+ REG_S x15, PT_A5(sp)
+ REG_S x16, PT_A6(sp)
+ REG_S x17, PT_A7(sp)
+ REG_S x18, PT_S2(sp)
+ REG_S x19, PT_S3(sp)
+ REG_S x20, PT_S4(sp)
+ REG_S x21, PT_S5(sp)
+ REG_S x22, PT_S6(sp)
+ REG_S x23, PT_S7(sp)
+ REG_S x24, PT_S8(sp)
+ REG_S x25, PT_S9(sp)
+ REG_S x26, PT_S10(sp)
+ REG_S x27, PT_S11(sp)
+ REG_S x28, PT_T3(sp)
+ REG_S x29, PT_T4(sp)
+ REG_S x30, PT_T5(sp)
+ REG_S x31, PT_T6(sp)
+
+ REG_L s0, TASK_TI_KERNEL_SP(tp)
+ csrr s1, CSR_STATUS
+ csrr s2, CSR_EPC
+ csrr s3, CSR_TVAL
+ csrr s4, CSR_CAUSE
+ csrr s5, CSR_SCRATCH
+ REG_S s0, PT_SP(sp)
+ REG_S s1, PT_STATUS(sp)
+ REG_S s2, PT_EPC(sp)
+ REG_S s3, PT_BADADDR(sp)
+ REG_S s4, PT_CAUSE(sp)
+ REG_S s5, PT_TP(sp)
+ move a0, sp
+ tail handle_bad_stack
+#endif
+
END(handle_exception)
ENTRY(ret_from_fork)
@@ -384,17 +561,8 @@ ENTRY(__switch_to)
REG_L s9, TASK_THREAD_S9_RA(a4)
REG_L s10, TASK_THREAD_S10_RA(a4)
REG_L s11, TASK_THREAD_S11_RA(a4)
- /* Swap the CPU entry around. */
- lw a3, TASK_TI_CPU(a0)
- lw a4, TASK_TI_CPU(a1)
- sw a3, TASK_TI_CPU(a1)
- sw a4, TASK_TI_CPU(a0)
-#if TASK_TI != 0
-#error "TASK_TI != 0: tp will contain a 'struct thread_info', not a 'struct task_struct' so get_current() won't work."
- addi tp, a1, TASK_TI
-#else
+ /* The offset of thread_info in task_struct is zero. */
move tp, a1
-#endif
ret
ENDPROC(__switch_to)
@@ -403,10 +571,11 @@ ENDPROC(__switch_to)
#endif
.section ".rodata"
+ .align LGREG
/* Exception vector table */
ENTRY(excp_vect_table)
RISCV_PTR do_trap_insn_misaligned
- RISCV_PTR do_trap_insn_fault
+ ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault)
RISCV_PTR do_trap_insn_illegal
RISCV_PTR do_trap_break
RISCV_PTR do_trap_load_misaligned
@@ -417,7 +586,8 @@ ENTRY(excp_vect_table)
RISCV_PTR do_trap_ecall_s
RISCV_PTR do_trap_unknown
RISCV_PTR do_trap_ecall_m
- RISCV_PTR do_page_fault /* instruction page fault */
+ /* instruciton page fault */
+ ALT_PAGE_FAULT(RISCV_PTR do_page_fault)
RISCV_PTR do_page_fault /* load page fault */
RISCV_PTR do_trap_unknown
RISCV_PTR do_page_fault /* store page fault */
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 08396614d6f4..2086f6585773 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2013 Linaro Limited
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
@@ -12,16 +12,14 @@
#include <asm/patch.h>
#ifdef CONFIG_DYNAMIC_FTRACE
-int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
+void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
{
mutex_lock(&text_mutex);
- return 0;
}
-int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
+void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
{
mutex_unlock(&text_mutex);
- return 0;
}
static int ftrace_check_current_call(unsigned long hook_pos,
@@ -38,7 +36,8 @@ static int ftrace_check_current_call(unsigned long hook_pos,
* Read the text we want to modify;
* return must be -EFAULT on read error
*/
- if (probe_kernel_read(replaced, (void *)hook_pos, MCOUNT_INSN_SIZE))
+ if (copy_from_kernel_nofault(replaced, (void *)hook_pos,
+ MCOUNT_INSN_SIZE))
return -EFAULT;
/*
@@ -71,29 +70,75 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
return 0;
}
+/*
+ * Put 5 instructions with 16 bytes at the front of function within
+ * patchable function entry nops' area.
+ *
+ * 0: REG_S ra, -SZREG(sp)
+ * 1: auipc ra, 0x?
+ * 2: jalr -?(ra)
+ * 3: REG_L ra, -SZREG(sp)
+ *
+ * So the opcodes is:
+ * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
+ * 1: 0x???????? -> auipc
+ * 2: 0x???????? -> jalr
+ * 3: 0xff813083 (ld)/0xffc12083 (lw)
+ */
+#if __riscv_xlen == 64
+#define INSN0 0xfe113c23
+#define INSN3 0xff813083
+#elif __riscv_xlen == 32
+#define INSN0 0xfe112e23
+#define INSN3 0xffc12083
+#endif
+
+#define FUNC_ENTRY_SIZE 16
+#define FUNC_ENTRY_JMP 4
+
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- int ret = ftrace_check_current_call(rec->ip, NULL);
+ unsigned int call[4] = {INSN0, 0, 0, INSN3};
+ unsigned long target = addr;
+ unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
- if (ret)
- return ret;
+ call[1] = to_auipc_insn((unsigned int)(target - caller));
+ call[2] = to_jalr_insn((unsigned int)(target - caller));
+
+ if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
+ return -EPERM;
- return __ftrace_modify_call(rec->ip, addr, true);
+ return 0;
}
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
- unsigned int call[2];
- int ret;
+ unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
- make_call(rec->ip, addr, call);
- ret = ftrace_check_current_call(rec->ip, call);
+ if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
+ return -EPERM;
- if (ret)
- return ret;
+ return 0;
+}
+
+
+/*
+ * This is called early on, and isn't wrapped by
+ * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold
+ * text_mutex, which triggers a lockdep failure. SMP isn't running so we could
+ * just directly poke the text, but it's simpler to just take the lock
+ * ourselves.
+ */
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+ int out;
- return __ftrace_modify_call(rec->ip, addr, false);
+ ftrace_arch_code_modify_prepare();
+ out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
+ ftrace_arch_code_modify_post_process();
+
+ return out;
}
int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -107,11 +152,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ret;
}
-
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
@@ -119,15 +159,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
unsigned int call[2];
+ unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
int ret;
- make_call(rec->ip, old_addr, call);
- ret = ftrace_check_current_call(rec->ip, call);
+ make_call(caller, old_addr, call);
+ ret = ftrace_check_current_call(caller, call);
if (ret)
return ret;
- return __ftrace_modify_call(rec->ip, addr, true);
+ return __ftrace_modify_call(caller, addr, true);
}
#endif
@@ -156,53 +197,30 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
#ifdef CONFIG_DYNAMIC_FTRACE
extern void ftrace_graph_call(void);
+extern void ftrace_graph_regs_call(void);
int ftrace_enable_ftrace_graph_caller(void)
{
- unsigned int call[2];
- static int init_graph = 1;
int ret;
- make_call(&ftrace_graph_call, &ftrace_stub, call);
-
- /*
- * When enabling graph tracer for the first time, ftrace_graph_call
- * should contains a call to ftrace_stub. Once it has been disabled,
- * the 8-bytes at the position becomes NOPs.
- */
- if (init_graph) {
- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
- call);
- init_graph = 0;
- } else {
- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
- NULL);
- }
-
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+ (unsigned long)&prepare_ftrace_return, true);
if (ret)
return ret;
- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
(unsigned long)&prepare_ftrace_return, true);
}
int ftrace_disable_ftrace_graph_caller(void)
{
- unsigned int call[2];
int ret;
- make_call(&ftrace_graph_call, &prepare_ftrace_return, call);
-
- /*
- * This is to make sure that ftrace_enable_ftrace_graph_caller
- * did the right thing.
- */
- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
- call);
-
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+ (unsigned long)&prepare_ftrace_return, false);
if (ret)
return ret;
- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
(unsigned long)&prepare_ftrace_return, false);
}
#endif /* CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 7ed1b22950fd..b865046e4dbb 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -3,16 +3,19 @@
* Copyright (C) 2012 Regents of the University of California
*/
-#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/asm.h>
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/thread_info.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include <asm/csr.h>
+#include <asm/cpu_ops_sbi.h>
#include <asm/hwcap.h>
#include <asm/image.h>
+#include <asm/xip_fixup.h>
+#include "efi-header.S"
__HEAD
ENTRY(_start)
@@ -22,11 +25,23 @@ ENTRY(_start)
* Do not modify it without modifying the structure and all bootloaders
* that expects this header format!!
*/
+#ifdef CONFIG_EFI
+ /*
+ * This instruction decodes to "MZ" ASCII required by UEFI.
+ */
+ c.li s4,-13
+ j _start_kernel
+#else
/* jump to start kernel */
j _start_kernel
/* reserved */
.word 0
+#endif
.balign 8
+#ifdef CONFIG_RISCV_M_MODE
+ /* Image load offset (0MB) from start of RAM for M-mode */
+ .dword 0
+#else
#if __riscv_xlen == 64
/* Image load offset(2MB) from start of RAM */
.dword 0x200000
@@ -34,6 +49,7 @@ ENTRY(_start)
/* Image load offset(4MB) from start of RAM */
.dword 0x400000
#endif
+#endif
/* Effective size of kernel image */
.dword _end - _start
.dword __HEAD_FLAGS
@@ -43,13 +59,23 @@ ENTRY(_start)
.ascii RISCV_IMAGE_MAGIC
.balign 4
.ascii RISCV_IMAGE_MAGIC2
+#ifdef CONFIG_EFI
+ .word pe_head_start - _start
+pe_head_start:
+
+ __EFI_PE_HEADER
+#else
.word 0
+#endif
.align 2
#ifdef CONFIG_MMU
-relocate:
+ .global relocate_enable_mmu
+relocate_enable_mmu:
/* Relocate return address */
- li a1, PAGE_OFFSET
+ la a1, kernel_map
+ XIP_FIXUP_OFFSET a1
+ REG_L a1, KERNEL_MAP_VIRT_ADDR(a1)
la a2, _start
sub a1, a1, a2
add ra, ra, a1
@@ -61,7 +87,8 @@ relocate:
/* Compute satp for kernel page tables, but don't load it yet */
srl a2, a0, PAGE_SHIFT
- li a1, SATP_MODE
+ la a1, satp_mode
+ REG_L a1, 0(a1)
or a2, a2, a1
/*
@@ -71,6 +98,7 @@ relocate:
* to ensure the new translations are in use.
*/
la a0, trampoline_pg_dir
+ XIP_FIXUP_OFFSET a0
srl a0, a0, PAGE_SHIFT
or a0, a0, a1
sfence.vma
@@ -90,7 +118,7 @@ relocate:
/*
* Switch to kernel page tables. A full fence is necessary in order to
* avoid using the trampoline translations, which are only correct for
- * the first superpage. Fetching the fence is guarnteed to work
+ * the first superpage. Fetching the fence is guaranteed to work
* because that first superpage is translated the same way.
*/
csrw CSR_SATP, a2
@@ -122,25 +150,42 @@ secondary_start_sbi:
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
- slli a3, a0, LGREG
- la a4, __cpu_up_stack_pointer
- la a5, __cpu_up_task_pointer
- add a4, a3, a4
- add a5, a3, a5
- REG_L sp, (a4)
- REG_L tp, (a5)
+ /* a0 contains the hartid & a1 contains boot data */
+ li a2, SBI_HART_BOOT_TASK_PTR_OFFSET
+ XIP_FIXUP_OFFSET a2
+ add a2, a2, a1
+ REG_L tp, (a2)
+ li a3, SBI_HART_BOOT_STACK_PTR_OFFSET
+ XIP_FIXUP_OFFSET a3
+ add a3, a3, a1
+ REG_L sp, (a3)
- .global secondary_start_common
-secondary_start_common:
+.Lsecondary_start_common:
#ifdef CONFIG_MMU
/* Enable virtual memory and relocate to virtual address */
la a0, swapper_pg_dir
- call relocate
+ XIP_FIXUP_OFFSET a0
+ call relocate_enable_mmu
#endif
+ call setup_trap_vector
tail smp_callin
#endif /* CONFIG_SMP */
+.align 2
+setup_trap_vector:
+ /* Set trap vector to exception handler */
+ la a0, handle_exception
+ csrw CSR_TVEC, a0
+
+ /*
+ * Set sup0 scratch register to 0, indicating to exception vector that
+ * we are presently executing in kernel.
+ */
+ csrw CSR_SCRATCH, zero
+ ret
+
+.align 2
.Lsecondary_park:
/* We lack SMP support or have too many harts, so park this hart */
wfi
@@ -148,7 +193,6 @@ secondary_start_common:
END(_start)
- __INIT
ENTRY(_start_kernel)
/* Mask all interrupts */
csrw CSR_IE, zero
@@ -196,19 +240,45 @@ pmp_done:
li t0, SR_FS
csrc CSR_STATUS, t0
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
li t0, CONFIG_NR_CPUS
blt a0, t0, .Lgood_cores
tail .Lsecondary_park
.Lgood_cores:
-#endif
+ /* The lottery system is only required for spinwait booting method */
+#ifndef CONFIG_XIP_KERNEL
/* Pick one hart to run the main boot sequence */
la a3, hart_lottery
li a2, 1
amoadd.w a3, a2, (a3)
bnez a3, .Lsecondary_start
+#else
+ /* hart_lottery in flash contains a magic number */
+ la a3, hart_lottery
+ mv a2, a3
+ XIP_FIXUP_OFFSET a2
+ XIP_FIXUP_FLASH_OFFSET a3
+ lw t1, (a3)
+ amoswap.w t0, t1, (a2)
+ /* first time here if hart_lottery in RAM is not set */
+ beq t0, t1, .Lsecondary_start
+
+#endif /* CONFIG_XIP */
+#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
+
+#ifdef CONFIG_XIP_KERNEL
+ la sp, _end + THREAD_SIZE
+ XIP_FIXUP_OFFSET sp
+ mv s0, a0
+ call __copy_data
+
+ /* Restore a0 copy */
+ mv a0, s0
+#endif
+
+#ifndef CONFIG_XIP_KERNEL
/* Clear BSS for flat non-ELF images */
la a3, __bss_start
la a4, __bss_stop
@@ -218,25 +288,35 @@ clear_bss:
add a3, a3, RISCV_SZPTR
blt a3, a4, clear_bss
clear_bss_done:
-
+#endif
/* Save hart ID and DTB physical address */
mv s0, a0
mv s1, a1
+
la a2, boot_cpu_hartid
+ XIP_FIXUP_OFFSET a2
REG_S a0, (a2)
/* Initialize page tables and relocate to virtual addresses */
+ la tp, init_task
la sp, init_thread_union + THREAD_SIZE
+ XIP_FIXUP_OFFSET sp
+#ifdef CONFIG_BUILTIN_DTB
+ la a0, __dtb_start
+ XIP_FIXUP_OFFSET a0
+#else
mv a0, s1
+#endif /* CONFIG_BUILTIN_DTB */
call setup_vm
#ifdef CONFIG_MMU
la a0, early_pg_dir
- call relocate
+ XIP_FIXUP_OFFSET a0
+ call relocate_enable_mmu
#endif /* CONFIG_MMU */
+ call setup_trap_vector
/* Restore C environment */
la tp, init_task
- sw zero, TASK_TI_CPU(tp)
la sp, init_thread_union + THREAD_SIZE
#ifdef CONFIG_KASAN
@@ -244,18 +324,19 @@ clear_bss_done:
#endif
/* Start the kernel */
call soc_early_init
- call parse_dtb
tail start_kernel
+#if CONFIG_RISCV_BOOT_SPINWAIT
.Lsecondary_start:
-#ifdef CONFIG_SMP
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
slli a3, a0, LGREG
- la a1, __cpu_up_stack_pointer
- la a2, __cpu_up_task_pointer
+ la a1, __cpu_spinwait_stack_pointer
+ XIP_FIXUP_OFFSET a1
+ la a2, __cpu_spinwait_task_pointer
+ XIP_FIXUP_OFFSET a2
add a1, a3, a1
add a2, a3, a2
@@ -271,8 +352,8 @@ clear_bss_done:
beqz tp, .Lwait_for_cpu_up
fence
- tail secondary_start_common
-#endif
+ tail .Lsecondary_start_common
+#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
END(_start_kernel)
@@ -354,7 +435,3 @@ ENTRY(reset_regs)
ret
END(reset_regs)
#endif /* CONFIG_RISCV_M_MODE */
-
-__PAGE_ALIGNED_BSS
- /* Empty zero page */
- .balign PAGE_SIZE
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
index 105fb0496b24..726731ada534 100644
--- a/arch/riscv/kernel/head.h
+++ b/arch/riscv/kernel/head.h
@@ -12,10 +12,13 @@ extern atomic_t hart_lottery;
asmlinkage void do_page_fault(struct pt_regs *regs);
asmlinkage void __init setup_vm(uintptr_t dtb_pa);
+#ifdef CONFIG_XIP_KERNEL
+asmlinkage void __init __copy_data(void);
+#endif
-extern void *__cpu_up_stack_pointer[];
-extern void *__cpu_up_task_pointer[];
-
-void __init parse_dtb(void);
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
+extern void *__cpu_spinwait_stack_pointer[];
+extern void *__cpu_spinwait_task_pointer[];
+#endif
#endif /* __ASM_HEAD_H */
diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h
new file mode 100644
index 000000000000..71a76a623257
--- /dev/null
+++ b/arch/riscv/kernel/image-vars.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ * Linker script variables to be set after section resolution, as
+ * ld.lld does not like variables assigned before SECTIONS is processed.
+ * Based on arch/arm64/kernel/image-vars.h
+ */
+#ifndef __RISCV_KERNEL_IMAGE_VARS_H
+#define __RISCV_KERNEL_IMAGE_VARS_H
+
+#ifndef LINKER_SCRIPT
+#error This file should only be included in vmlinux.lds.S
+#endif
+
+#ifdef CONFIG_EFI
+
+/*
+ * The EFI stub has its own symbol namespace prefixed by __efistub_, to
+ * isolate it from the kernel proper. The following symbols are legally
+ * accessed by the stub, so provide some aliases to make them accessible.
+ * Only include data symbols here, or text symbols of functions that are
+ * guaranteed to be safe when executed at another offset than they were
+ * linked at. The routines below are all implemented in assembler in a
+ * position independent manner
+ */
+__efistub_memcmp = memcmp;
+__efistub_memchr = memchr;
+__efistub_memcpy = memcpy;
+__efistub_memmove = memmove;
+__efistub_memset = memset;
+__efistub_strlen = strlen;
+__efistub_strnlen = strnlen;
+__efistub_strcmp = strcmp;
+__efistub_strncmp = strncmp;
+__efistub_strrchr = strrchr;
+
+#ifdef CONFIG_KASAN
+__efistub___memcpy = memcpy;
+__efistub___memmove = memmove;
+__efistub___memset = memset;
+#endif
+
+__efistub__start = _start;
+__efistub__start_kernel = _start_kernel;
+__efistub__end = _end;
+__efistub__edata = _edata;
+__efistub_screen_info = screen_info;
+
+#endif
+
+#endif /* __RISCV_KERNEL_IMAGE_VARS_H */
diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
new file mode 100644
index 000000000000..20e09056d141
--- /dev/null
+++ b/arch/riscv/kernel/jump_label.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Emil Renner Berthing
+ *
+ * Based on arch/arm64/kernel/jump_label.c
+ */
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/memory.h>
+#include <linux/mutex.h>
+#include <asm/bug.h>
+#include <asm/patch.h>
+
+#define RISCV_INSN_NOP 0x00000013U
+#define RISCV_INSN_JAL 0x0000006fU
+
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ void *addr = (void *)jump_entry_code(entry);
+ u32 insn;
+
+ if (type == JUMP_LABEL_JMP) {
+ long offset = jump_entry_target(entry) - jump_entry_code(entry);
+
+ if (WARN_ON(offset & 1 || offset < -524288 || offset >= 524288))
+ return;
+
+ insn = RISCV_INSN_JAL |
+ (((u32)offset & GENMASK(19, 12)) << (12 - 12)) |
+ (((u32)offset & GENMASK(11, 11)) << (20 - 11)) |
+ (((u32)offset & GENMASK(10, 1)) << (21 - 1)) |
+ (((u32)offset & GENMASK(20, 20)) << (31 - 20));
+ } else {
+ insn = RISCV_INSN_NOP;
+ }
+
+ mutex_lock(&text_mutex);
+ patch_text_nosync(addr, &insn, sizeof(insn));
+ mutex_unlock(&text_mutex);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ /*
+ * We use the same instructions in the arch_static_branch and
+ * arch_static_branch_jump inline functions, so there's no
+ * need to patch them up here.
+ * The core will call arch_jump_label_transform when those
+ * instructions need to be replaced.
+ */
+}
diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S
new file mode 100644
index 000000000000..059c5e216ae7
--- /dev/null
+++ b/arch/riscv/kernel/kexec_relocate.S
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
+ */
+
+#include <asm/asm.h> /* For RISCV_* and REG_* macros */
+#include <asm/csr.h> /* For CSR_* macros */
+#include <asm/page.h> /* For PAGE_SIZE */
+#include <linux/linkage.h> /* For SYM_* macros */
+
+.section ".rodata"
+SYM_CODE_START(riscv_kexec_relocate)
+
+ /*
+ * s0: Pointer to the current entry
+ * s1: (const) Phys address to jump to after relocation
+ * s2: (const) Phys address of the FDT image
+ * s3: (const) The hartid of the current hart
+ * s4: Pointer to the destination address for the relocation
+ * s5: (const) Number of words per page
+ * s6: (const) 1, used for subtraction
+ * s7: (const) kernel_map.va_pa_offset, used when switching MMU off
+ * s8: (const) Physical address of the main loop
+ * s9: (debug) indirection page counter
+ * s10: (debug) entry counter
+ * s11: (debug) copied words counter
+ */
+ mv s0, a0
+ mv s1, a1
+ mv s2, a2
+ mv s3, a3
+ mv s4, zero
+ li s5, (PAGE_SIZE / RISCV_SZPTR)
+ li s6, 1
+ mv s7, a4
+ mv s8, zero
+ mv s9, zero
+ mv s10, zero
+ mv s11, zero
+
+ /* Disable / cleanup interrupts */
+ csrw CSR_SIE, zero
+ csrw CSR_SIP, zero
+
+ /*
+ * When we switch SATP.MODE to "Bare" we'll only
+ * play with physical addresses. However the first time
+ * we try to jump somewhere, the offset on the jump
+ * will be relative to pc which will still be on VA. To
+ * deal with this we set stvec to the physical address at
+ * the start of the loop below so that we jump there in
+ * any case.
+ */
+ la s8, 1f
+ sub s8, s8, s7
+ csrw CSR_STVEC, s8
+
+ /* Process entries in a loop */
+.align 2
+1:
+ addi s10, s10, 1
+ REG_L t0, 0(s0) /* t0 = *image->entry */
+ addi s0, s0, RISCV_SZPTR /* image->entry++ */
+
+ /* IND_DESTINATION entry ? -> save destination address */
+ andi t1, t0, 0x1
+ beqz t1, 2f
+ andi s4, t0, ~0x1
+ j 1b
+
+2:
+ /* IND_INDIRECTION entry ? -> update next entry ptr (PA) */
+ andi t1, t0, 0x2
+ beqz t1, 2f
+ andi s0, t0, ~0x2
+ addi s9, s9, 1
+ csrw CSR_SATP, zero
+ jalr zero, s8, 0
+
+2:
+ /* IND_DONE entry ? -> jump to done label */
+ andi t1, t0, 0x4
+ beqz t1, 2f
+ j 4f
+
+2:
+ /*
+ * IND_SOURCE entry ? -> copy page word by word to the
+ * destination address we got from IND_DESTINATION
+ */
+ andi t1, t0, 0x8
+ beqz t1, 1b /* Unknown entry type, ignore it */
+ andi t0, t0, ~0x8
+ mv t3, s5 /* i = num words per page */
+3: /* copy loop */
+ REG_L t1, (t0) /* t1 = *src_ptr */
+ REG_S t1, (s4) /* *dst_ptr = *src_ptr */
+ addi t0, t0, RISCV_SZPTR /* stc_ptr++ */
+ addi s4, s4, RISCV_SZPTR /* dst_ptr++ */
+ sub t3, t3, s6 /* i-- */
+ addi s11, s11, 1 /* c++ */
+ beqz t3, 1b /* copy done ? */
+ j 3b
+
+4:
+ /* Pass the arguments to the next kernel / Cleanup*/
+ mv a0, s3
+ mv a1, s2
+ mv a2, s1
+
+ /* Cleanup */
+ mv a3, zero
+ mv a4, zero
+ mv a5, zero
+ mv a6, zero
+ mv a7, zero
+
+ mv s0, zero
+ mv s1, zero
+ mv s2, zero
+ mv s3, zero
+ mv s4, zero
+ mv s5, zero
+ mv s6, zero
+ mv s7, zero
+ mv s8, zero
+ mv s9, zero
+ mv s10, zero
+ mv s11, zero
+
+ mv t0, zero
+ mv t1, zero
+ mv t2, zero
+ mv t3, zero
+ mv t4, zero
+ mv t5, zero
+ mv t6, zero
+ csrw CSR_SEPC, zero
+ csrw CSR_SCAUSE, zero
+ csrw CSR_SSCRATCH, zero
+
+ /*
+ * Make sure the relocated code is visible
+ * and jump to the new kernel
+ */
+ fence.i
+
+ jalr zero, a2, 0
+
+SYM_CODE_END(riscv_kexec_relocate)
+riscv_kexec_relocate_end:
+
+
+/* Used for jumping to crashkernel */
+.section ".text"
+SYM_CODE_START(riscv_kexec_norelocate)
+ /*
+ * s0: (const) Phys address to jump to
+ * s1: (const) Phys address of the FDT image
+ * s2: (const) The hartid of the current hart
+ */
+ mv s0, a1
+ mv s1, a2
+ mv s2, a3
+
+ /* Disable / cleanup interrupts */
+ csrw CSR_SIE, zero
+ csrw CSR_SIP, zero
+
+ /* Pass the arguments to the next kernel / Cleanup*/
+ mv a0, s2
+ mv a1, s1
+ mv a2, s0
+
+ /* Cleanup */
+ mv a3, zero
+ mv a4, zero
+ mv a5, zero
+ mv a6, zero
+ mv a7, zero
+
+ mv s0, zero
+ mv s1, zero
+ mv s2, zero
+ mv s3, zero
+ mv s4, zero
+ mv s5, zero
+ mv s6, zero
+ mv s7, zero
+ mv s8, zero
+ mv s9, zero
+ mv s10, zero
+ mv s11, zero
+
+ mv t0, zero
+ mv t1, zero
+ mv t2, zero
+ mv t3, zero
+ mv t4, zero
+ mv t5, zero
+ mv t6, zero
+ csrw CSR_SEPC, zero
+ csrw CSR_SCAUSE, zero
+ csrw CSR_SSCRATCH, zero
+
+ /*
+ * Switch to physical addressing
+ * This will also trigger a jump to CSR_STVEC
+ * which in this case is the address of the new
+ * kernel.
+ */
+ csrw CSR_STVEC, a2
+ csrw CSR_SATP, zero
+
+SYM_CODE_END(riscv_kexec_norelocate)
+
+.section ".rodata"
+SYM_DATA(riscv_kexec_relocate_size,
+ .long riscv_kexec_relocate_end - riscv_kexec_relocate)
+
diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c
index f16ade84a11f..963ed7edcff2 100644
--- a/arch/riscv/kernel/kgdb.c
+++ b/arch/riscv/kernel/kgdb.c
@@ -44,25 +44,25 @@ DECLARE_INSN(c_beqz, MATCH_C_BEQZ, MASK_C_BEQZ)
DECLARE_INSN(c_bnez, MATCH_C_BNEZ, MASK_C_BNEZ)
DECLARE_INSN(sret, MATCH_SRET, MASK_SRET)
-int decode_register_index(unsigned long opcode, int offset)
+static int decode_register_index(unsigned long opcode, int offset)
{
return (opcode >> offset) & 0x1F;
}
-int decode_register_index_short(unsigned long opcode, int offset)
+static int decode_register_index_short(unsigned long opcode, int offset)
{
return ((opcode >> offset) & 0x7) + 8;
}
/* Calculate the new address for after a step */
-int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
+static int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
{
unsigned long pc = regs->epc;
unsigned long *regs_ptr = (unsigned long *)regs;
unsigned int rs1_num, rs2_num;
int op_code;
- if (probe_kernel_address((void *)pc, op_code))
+ if (get_kernel_nofault(op_code, (void *)pc))
return -EINVAL;
if ((op_code & __INSN_LENGTH_MASK) != __INSN_LENGTH_GE_32) {
if (is_c_jalr_insn(op_code) || is_c_jr_insn(op_code)) {
@@ -136,7 +136,7 @@ int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
return 0;
}
-int do_single_step(struct pt_regs *regs)
+static int do_single_step(struct pt_regs *regs)
{
/* Determine where the target instruction will send us to */
unsigned long addr = 0;
@@ -146,14 +146,14 @@ int do_single_step(struct pt_regs *regs)
return error;
/* Store the op code in the stepped address */
- error = probe_kernel_address((void *)addr, stepped_opcode);
+ error = get_kernel_nofault(stepped_opcode, (void *)addr);
if (error)
return error;
stepped_address = addr;
/* Replace the op code with the break instruction */
- error = probe_kernel_write((void *)stepped_address,
+ error = copy_to_kernel_nofault((void *)stepped_address,
arch_kgdb_ops.gdb_bpt_instr,
BREAK_INSTR_SIZE);
/* Flush and return */
@@ -173,7 +173,7 @@ int do_single_step(struct pt_regs *regs)
static void undo_single_step(struct pt_regs *regs)
{
if (stepped_opcode != 0) {
- probe_kernel_write((void *)stepped_address,
+ copy_to_kernel_nofault((void *)stepped_address,
(void *)&stepped_opcode, BREAK_INSTR_SIZE);
flush_icache_range(stepped_address,
stepped_address + BREAK_INSTR_SIZE);
@@ -320,7 +320,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
return err;
}
-int kgdb_riscv_kgdbbreak(unsigned long addr)
+static int kgdb_riscv_kgdbbreak(unsigned long addr)
{
if (stepped_address == addr)
return KGDB_SW_SINGLE_STEP;
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
new file mode 100644
index 000000000000..df8e24559035
--- /dev/null
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
+ */
+
+#include <linux/kexec.h>
+#include <asm/kexec.h> /* For riscv_kexec_* symbol defines */
+#include <linux/smp.h> /* For smp_send_stop () */
+#include <asm/cacheflush.h> /* For local_flush_icache_all() */
+#include <asm/barrier.h> /* For smp_wmb() */
+#include <asm/page.h> /* For PAGE_MASK */
+#include <linux/libfdt.h> /* For fdt_check_header() */
+#include <asm/set_memory.h> /* For set_memory_x() */
+#include <linux/compiler.h> /* For unreachable() */
+#include <linux/cpu.h> /* For cpu_down() */
+#include <linux/reboot.h>
+
+/*
+ * kexec_image_info - Print received image details
+ */
+static void
+kexec_image_info(const struct kimage *image)
+{
+ unsigned long i;
+
+ pr_debug("Kexec image info:\n");
+ pr_debug("\ttype: %d\n", image->type);
+ pr_debug("\tstart: %lx\n", image->start);
+ pr_debug("\thead: %lx\n", image->head);
+ pr_debug("\tnr_segments: %lu\n", image->nr_segments);
+
+ for (i = 0; i < image->nr_segments; i++) {
+ pr_debug("\t segment[%lu]: %016lx - %016lx", i,
+ image->segment[i].mem,
+ image->segment[i].mem + image->segment[i].memsz);
+ pr_debug("\t\t0x%lx bytes, %lu pages\n",
+ (unsigned long) image->segment[i].memsz,
+ (unsigned long) image->segment[i].memsz / PAGE_SIZE);
+ }
+}
+
+/*
+ * machine_kexec_prepare - Initialize kexec
+ *
+ * This function is called from do_kexec_load, when the user has
+ * provided us with an image to be loaded. Its goal is to validate
+ * the image and prepare the control code buffer as needed.
+ * Note that kimage_alloc_init has already been called and the
+ * control buffer has already been allocated.
+ */
+int
+machine_kexec_prepare(struct kimage *image)
+{
+ struct kimage_arch *internal = &image->arch;
+ struct fdt_header fdt = {0};
+ void *control_code_buffer = NULL;
+ unsigned int control_code_buffer_sz = 0;
+ int i = 0;
+
+ kexec_image_info(image);
+
+ /* Find the Flattened Device Tree and save its physical address */
+ for (i = 0; i < image->nr_segments; i++) {
+ if (image->segment[i].memsz <= sizeof(fdt))
+ continue;
+
+ if (image->file_mode)
+ memcpy(&fdt, image->segment[i].buf, sizeof(fdt));
+ else if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt)))
+ continue;
+
+ if (fdt_check_header(&fdt))
+ continue;
+
+ internal->fdt_addr = (unsigned long) image->segment[i].mem;
+ break;
+ }
+
+ if (!internal->fdt_addr) {
+ pr_err("Device tree not included in the provided image\n");
+ return -EINVAL;
+ }
+
+ /* Copy the assembler code for relocation to the control page */
+ if (image->type != KEXEC_TYPE_CRASH) {
+ control_code_buffer = page_address(image->control_code_page);
+ control_code_buffer_sz = page_size(image->control_code_page);
+
+ if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) {
+ pr_err("Relocation code doesn't fit within a control page\n");
+ return -EINVAL;
+ }
+
+ memcpy(control_code_buffer, riscv_kexec_relocate,
+ riscv_kexec_relocate_size);
+
+ /* Mark the control page executable */
+ set_memory_x((unsigned long) control_code_buffer, 1);
+ }
+
+ return 0;
+}
+
+
+/*
+ * machine_kexec_cleanup - Cleanup any leftovers from
+ * machine_kexec_prepare
+ *
+ * This function is called by kimage_free to handle any arch-specific
+ * allocations done on machine_kexec_prepare. Since we didn't do any
+ * allocations there, this is just an empty function. Note that the
+ * control buffer is freed by kimage_free.
+ */
+void
+machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+
+/*
+ * machine_shutdown - Prepare for a kexec reboot
+ *
+ * This function is called by kernel_kexec just before machine_kexec
+ * below. Its goal is to prepare the rest of the system (the other
+ * harts and possibly devices etc) for a kexec reboot.
+ */
+void machine_shutdown(void)
+{
+ /*
+ * No more interrupts on this hart
+ * until we are back up.
+ */
+ local_irq_disable();
+
+#if defined(CONFIG_HOTPLUG_CPU)
+ smp_shutdown_nonboot_cpus(smp_processor_id());
+#endif
+}
+
+/*
+ * machine_crash_shutdown - Prepare to kexec after a kernel crash
+ *
+ * This function is called by crash_kexec just before machine_kexec
+ * below and its goal is similar to machine_shutdown, but in case of
+ * a kernel crash. Since we don't handle such cases yet, this function
+ * is empty.
+ */
+void
+machine_crash_shutdown(struct pt_regs *regs)
+{
+ crash_save_cpu(regs, smp_processor_id());
+ machine_shutdown();
+ pr_info("Starting crashdump kernel...\n");
+}
+
+/*
+ * machine_kexec - Jump to the loaded kimage
+ *
+ * This function is called by kernel_kexec which is called by the
+ * reboot system call when the reboot cmd is LINUX_REBOOT_CMD_KEXEC,
+ * or by crash_kernel which is called by the kernel's arch-specific
+ * trap handler in case of a kernel panic. It's the final stage of
+ * the kexec process where the pre-loaded kimage is ready to be
+ * executed. We assume at this point that all other harts are
+ * suspended and this hart will be the new boot hart.
+ */
+void __noreturn
+machine_kexec(struct kimage *image)
+{
+ struct kimage_arch *internal = &image->arch;
+ unsigned long jump_addr = (unsigned long) image->start;
+ unsigned long first_ind_entry = (unsigned long) &image->head;
+ unsigned long this_cpu_id = smp_processor_id();
+ unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id);
+ unsigned long fdt_addr = internal->fdt_addr;
+ void *control_code_buffer = page_address(image->control_code_page);
+ riscv_kexec_method kexec_method = NULL;
+
+ if (image->type != KEXEC_TYPE_CRASH)
+ kexec_method = control_code_buffer;
+ else
+ kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate;
+
+ pr_notice("Will call new kernel at %08lx from hart id %lx\n",
+ jump_addr, this_hart_id);
+ pr_notice("FDT image at %08lx\n", fdt_addr);
+
+ /* Make sure the relocation code is visible to the hart */
+ local_flush_icache_all();
+
+ /* Jump to the relocation code */
+ pr_notice("Bye...\n");
+ kexec_method(first_ind_entry, jump_addr, fdt_addr,
+ this_hart_id, kernel_map.va_pa_offset);
+ unreachable();
+}
diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c
new file mode 100644
index 000000000000..b0bf8c1722c0
--- /dev/null
+++ b/arch/riscv/kernel/machine_kexec_file.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kexec_file for riscv, use vmlinux as the dump-capture kernel image.
+ *
+ * Copyright (C) 2021 Huawei Technologies Co, Ltd.
+ *
+ * Author: Liao Chang (liaochang1@huawei.com)
+ */
+#include <linux/kexec.h>
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+ &elf_kexec_ops,
+ NULL
+};
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index 35a6ed76cb8b..d171eca623b6 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -13,224 +13,186 @@
.text
- .macro SAVE_ABI_STATE
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- addi sp, sp, -48
- sd s0, 32(sp)
- sd ra, 40(sp)
- addi s0, sp, 48
- sd t0, 24(sp)
- sd t1, 16(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- sd t2, 8(sp)
-#endif
-#else
- addi sp, sp, -16
- sd s0, 0(sp)
- sd ra, 8(sp)
- addi s0, sp, 16
-#endif
+#define FENTRY_RA_OFFSET 12
+#define ABI_SIZE_ON_STACK 72
+#define ABI_A0 0
+#define ABI_A1 8
+#define ABI_A2 16
+#define ABI_A3 24
+#define ABI_A4 32
+#define ABI_A5 40
+#define ABI_A6 48
+#define ABI_A7 56
+#define ABI_RA 64
+
+ .macro SAVE_ABI
+ addi sp, sp, -SZREG
+ addi sp, sp, -ABI_SIZE_ON_STACK
+
+ REG_S a0, ABI_A0(sp)
+ REG_S a1, ABI_A1(sp)
+ REG_S a2, ABI_A2(sp)
+ REG_S a3, ABI_A3(sp)
+ REG_S a4, ABI_A4(sp)
+ REG_S a5, ABI_A5(sp)
+ REG_S a6, ABI_A6(sp)
+ REG_S a7, ABI_A7(sp)
+ REG_S ra, ABI_RA(sp)
.endm
- .macro RESTORE_ABI_STATE
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- ld s0, 32(sp)
- ld ra, 40(sp)
- addi sp, sp, 48
-#else
- ld ra, 8(sp)
- ld s0, 0(sp)
- addi sp, sp, 16
-#endif
+ .macro RESTORE_ABI
+ REG_L a0, ABI_A0(sp)
+ REG_L a1, ABI_A1(sp)
+ REG_L a2, ABI_A2(sp)
+ REG_L a3, ABI_A3(sp)
+ REG_L a4, ABI_A4(sp)
+ REG_L a5, ABI_A5(sp)
+ REG_L a6, ABI_A6(sp)
+ REG_L a7, ABI_A7(sp)
+ REG_L ra, ABI_RA(sp)
+
+ addi sp, sp, ABI_SIZE_ON_STACK
+ addi sp, sp, SZREG
.endm
- .macro RESTORE_GRAPH_ARGS
- ld a0, 24(sp)
- ld a1, 16(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- ld a2, 8(sp)
-#endif
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ .macro SAVE_ALL
+ addi sp, sp, -SZREG
+ addi sp, sp, -PT_SIZE_ON_STACK
+
+ REG_S x1, PT_EPC(sp)
+ addi sp, sp, PT_SIZE_ON_STACK
+ REG_L x1, (sp)
+ addi sp, sp, -PT_SIZE_ON_STACK
+ REG_S x1, PT_RA(sp)
+ REG_L x1, PT_EPC(sp)
+
+ REG_S x2, PT_SP(sp)
+ REG_S x3, PT_GP(sp)
+ REG_S x4, PT_TP(sp)
+ REG_S x5, PT_T0(sp)
+ REG_S x6, PT_T1(sp)
+ REG_S x7, PT_T2(sp)
+ REG_S x8, PT_S0(sp)
+ REG_S x9, PT_S1(sp)
+ REG_S x10, PT_A0(sp)
+ REG_S x11, PT_A1(sp)
+ REG_S x12, PT_A2(sp)
+ REG_S x13, PT_A3(sp)
+ REG_S x14, PT_A4(sp)
+ REG_S x15, PT_A5(sp)
+ REG_S x16, PT_A6(sp)
+ REG_S x17, PT_A7(sp)
+ REG_S x18, PT_S2(sp)
+ REG_S x19, PT_S3(sp)
+ REG_S x20, PT_S4(sp)
+ REG_S x21, PT_S5(sp)
+ REG_S x22, PT_S6(sp)
+ REG_S x23, PT_S7(sp)
+ REG_S x24, PT_S8(sp)
+ REG_S x25, PT_S9(sp)
+ REG_S x26, PT_S10(sp)
+ REG_S x27, PT_S11(sp)
+ REG_S x28, PT_T3(sp)
+ REG_S x29, PT_T4(sp)
+ REG_S x30, PT_T5(sp)
+ REG_S x31, PT_T6(sp)
.endm
-ENTRY(ftrace_graph_caller)
- addi sp, sp, -16
- sd s0, 0(sp)
- sd ra, 8(sp)
- addi s0, sp, 16
-ftrace_graph_call:
- .global ftrace_graph_call
- /*
- * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the
- * call below. Check ftrace_modify_all_code for details.
- */
- call ftrace_stub
- ld ra, 8(sp)
- ld s0, 0(sp)
- addi sp, sp, 16
- ret
-ENDPROC(ftrace_graph_caller)
+ .macro RESTORE_ALL
+ REG_L x1, PT_RA(sp)
+ addi sp, sp, PT_SIZE_ON_STACK
+ REG_S x1, (sp)
+ addi sp, sp, -PT_SIZE_ON_STACK
+ REG_L x1, PT_EPC(sp)
+ REG_L x2, PT_SP(sp)
+ REG_L x3, PT_GP(sp)
+ REG_L x4, PT_TP(sp)
+ REG_L x5, PT_T0(sp)
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x8, PT_S0(sp)
+ REG_L x9, PT_S1(sp)
+ REG_L x10, PT_A0(sp)
+ REG_L x11, PT_A1(sp)
+ REG_L x12, PT_A2(sp)
+ REG_L x13, PT_A3(sp)
+ REG_L x14, PT_A4(sp)
+ REG_L x15, PT_A5(sp)
+ REG_L x16, PT_A6(sp)
+ REG_L x17, PT_A7(sp)
+ REG_L x18, PT_S2(sp)
+ REG_L x19, PT_S3(sp)
+ REG_L x20, PT_S4(sp)
+ REG_L x21, PT_S5(sp)
+ REG_L x22, PT_S6(sp)
+ REG_L x23, PT_S7(sp)
+ REG_L x24, PT_S8(sp)
+ REG_L x25, PT_S9(sp)
+ REG_L x26, PT_S10(sp)
+ REG_L x27, PT_S11(sp)
+ REG_L x28, PT_T3(sp)
+ REG_L x29, PT_T4(sp)
+ REG_L x30, PT_T5(sp)
+ REG_L x31, PT_T6(sp)
+
+ addi sp, sp, PT_SIZE_ON_STACK
+ addi sp, sp, SZREG
+ .endm
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
ENTRY(ftrace_caller)
- /*
- * a0: the address in the caller when calling ftrace_caller
- * a1: the caller's return address
- * a2: the address of global variable function_trace_op
- */
- ld a1, -8(s0)
- addi a0, ra, -MCOUNT_INSN_SIZE
- la t5, function_trace_op
- ld a2, 0(t5)
+ SAVE_ABI
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /*
- * the graph tracer (specifically, prepare_ftrace_return) needs these
- * arguments but for now the function tracer occupies the regs, so we
- * save them in temporary regs to recover later.
- */
- addi t0, s0, -8
- mv t1, a0
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- ld t2, -16(s0)
-#endif
-#endif
+ addi a0, ra, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+ REG_L a1, ABI_SIZE_ON_STACK(sp)
+ mv a3, sp
- SAVE_ABI_STATE
ftrace_call:
.global ftrace_call
- /*
- * For the dynamic ftrace to work, here we should reserve at least
- * 8 bytes for a functional auipc-jalr pair. The following call
- * serves this purpose.
- *
- * Calling ftrace_update_ftrace_func would overwrite the nops below.
- * Check ftrace_modify_all_code for details.
- */
call ftrace_stub
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- RESTORE_GRAPH_ARGS
- call ftrace_graph_caller
+ addi a0, sp, ABI_SIZE_ON_STACK
+ REG_L a1, ABI_RA(sp)
+ addi a1, a1, -FENTRY_RA_OFFSET
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ mv a2, s0
#endif
-
- RESTORE_ABI_STATE
+ftrace_graph_call:
+ .global ftrace_graph_call
+ call ftrace_stub
+#endif
+ RESTORE_ABI
ret
ENDPROC(ftrace_caller)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
- .macro SAVE_ALL
- addi sp, sp, -(PT_SIZE_ON_STACK+16)
- sd s0, (PT_SIZE_ON_STACK)(sp)
- sd ra, (PT_SIZE_ON_STACK+8)(sp)
- addi s0, sp, (PT_SIZE_ON_STACK+16)
-
- sd x1, PT_RA(sp)
- sd x2, PT_SP(sp)
- sd x3, PT_GP(sp)
- sd x4, PT_TP(sp)
- sd x5, PT_T0(sp)
- sd x6, PT_T1(sp)
- sd x7, PT_T2(sp)
- sd x8, PT_S0(sp)
- sd x9, PT_S1(sp)
- sd x10, PT_A0(sp)
- sd x11, PT_A1(sp)
- sd x12, PT_A2(sp)
- sd x13, PT_A3(sp)
- sd x14, PT_A4(sp)
- sd x15, PT_A5(sp)
- sd x16, PT_A6(sp)
- sd x17, PT_A7(sp)
- sd x18, PT_S2(sp)
- sd x19, PT_S3(sp)
- sd x20, PT_S4(sp)
- sd x21, PT_S5(sp)
- sd x22, PT_S6(sp)
- sd x23, PT_S7(sp)
- sd x24, PT_S8(sp)
- sd x25, PT_S9(sp)
- sd x26, PT_S10(sp)
- sd x27, PT_S11(sp)
- sd x28, PT_T3(sp)
- sd x29, PT_T4(sp)
- sd x30, PT_T5(sp)
- sd x31, PT_T6(sp)
- .endm
-
- .macro RESTORE_ALL
- ld x1, PT_RA(sp)
- ld x2, PT_SP(sp)
- ld x3, PT_GP(sp)
- ld x4, PT_TP(sp)
- ld x5, PT_T0(sp)
- ld x6, PT_T1(sp)
- ld x7, PT_T2(sp)
- ld x8, PT_S0(sp)
- ld x9, PT_S1(sp)
- ld x10, PT_A0(sp)
- ld x11, PT_A1(sp)
- ld x12, PT_A2(sp)
- ld x13, PT_A3(sp)
- ld x14, PT_A4(sp)
- ld x15, PT_A5(sp)
- ld x16, PT_A6(sp)
- ld x17, PT_A7(sp)
- ld x18, PT_S2(sp)
- ld x19, PT_S3(sp)
- ld x20, PT_S4(sp)
- ld x21, PT_S5(sp)
- ld x22, PT_S6(sp)
- ld x23, PT_S7(sp)
- ld x24, PT_S8(sp)
- ld x25, PT_S9(sp)
- ld x26, PT_S10(sp)
- ld x27, PT_S11(sp)
- ld x28, PT_T3(sp)
- ld x29, PT_T4(sp)
- ld x30, PT_T5(sp)
- ld x31, PT_T6(sp)
-
- ld s0, (PT_SIZE_ON_STACK)(sp)
- ld ra, (PT_SIZE_ON_STACK+8)(sp)
- addi sp, sp, (PT_SIZE_ON_STACK+16)
- .endm
-
- .macro RESTORE_GRAPH_REG_ARGS
- ld a0, PT_T0(sp)
- ld a1, PT_T1(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- ld a2, PT_T2(sp)
-#endif
- .endm
-
-/*
- * Most of the contents are the same as ftrace_caller.
- */
ENTRY(ftrace_regs_caller)
- /*
- * a3: the address of all registers in the stack
- */
- ld a1, -8(s0)
- addi a0, ra, -MCOUNT_INSN_SIZE
- la t5, function_trace_op
- ld a2, 0(t5)
- addi a3, sp, -(PT_SIZE_ON_STACK+16)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- addi t0, s0, -8
- mv t1, a0
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- ld t2, -16(s0)
-#endif
-#endif
SAVE_ALL
+ addi a0, ra, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+ REG_L a1, PT_SIZE_ON_STACK(sp)
+ mv a3, sp
+
ftrace_regs_call:
.global ftrace_regs_call
call ftrace_stub
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- RESTORE_GRAPH_REG_ARGS
- call ftrace_graph_caller
+ addi a0, sp, PT_RA
+ REG_L a1, PT_EPC(sp)
+ addi a1, a1, -FENTRY_RA_OFFSET
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ mv a2, s0
+#endif
+ftrace_graph_regs_call:
+ .global ftrace_graph_regs_call
+ call ftrace_stub
#endif
RESTORE_ALL
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index 8a5593ff9ff3..6d462681c9c0 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -47,8 +47,8 @@
ENTRY(ftrace_stub)
#ifdef CONFIG_DYNAMIC_FTRACE
- .global _mcount
- .set _mcount, ftrace_stub
+ .global MCOUNT_NAME
+ .set MCOUNT_NAME, ftrace_stub
#endif
ret
ENDPROC(ftrace_stub)
@@ -78,7 +78,7 @@ ENDPROC(return_to_handler)
#endif
#ifndef CONFIG_DYNAMIC_FTRACE
-ENTRY(_mcount)
+ENTRY(MCOUNT_NAME)
la t4, ftrace_stub
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
la t0, ftrace_graph_return
@@ -124,6 +124,6 @@ do_trace:
jalr t5
RESTORE_ABI_STATE
ret
-ENDPROC(_mcount)
+ENDPROC(MCOUNT_NAME)
#endif
-EXPORT_SYMBOL(_mcount)
+EXPORT_SYMBOL(MCOUNT_NAME)
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 7191342c54da..91fe16bfaa07 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -11,8 +11,22 @@
#include <linux/vmalloc.h>
#include <linux/sizes.h>
#include <linux/pgtable.h>
+#include <asm/alternative.h>
#include <asm/sections.h>
+/*
+ * The auipc+jalr instruction pair can reach any PC-relative offset
+ * in the range [-2^31 - 2^11, 2^31 - 2^11)
+ */
+static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
+{
+#ifdef CONFIG_32BIT
+ return true;
+#else
+ return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11));
+#endif
+}
+
static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
{
if (v != (u32)v) {
@@ -56,7 +70,7 @@ static int apply_r_riscv_jal_rela(struct module *me, u32 *location,
return 0;
}
-static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location,
+static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location,
Elf_Addr v)
{
ptrdiff_t offset = (void *)v - (void *)location;
@@ -95,7 +109,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
ptrdiff_t offset = (void *)v - (void *)location;
s32 hi20;
- if (offset != (s32)offset) {
+ if (!riscv_insn_valid_32bit_offset(offset)) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
me->name, (long long)v, location);
@@ -197,10 +211,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
Elf_Addr v)
{
ptrdiff_t offset = (void *)v - (void *)location;
- s32 fill_v = offset;
u32 hi20, lo12;
- if (offset != fill_v) {
+ if (!riscv_insn_valid_32bit_offset(offset)) {
/* Only emit the plt entry if offset over 32-bit range */
if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
offset = module_emit_plt_entry(me, v);
@@ -224,10 +237,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location,
Elf_Addr v)
{
ptrdiff_t offset = (void *)v - (void *)location;
- s32 fill_v = offset;
u32 hi20, lo12;
- if (offset != fill_v) {
+ if (!riscv_insn_valid_32bit_offset(offset)) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
me->name, (long long)v, location);
@@ -263,6 +275,13 @@ static int apply_r_riscv_add32_rela(struct module *me, u32 *location,
return 0;
}
+static int apply_r_riscv_add64_rela(struct module *me, u32 *location,
+ Elf_Addr v)
+{
+ *(u64 *)location += (u64)v;
+ return 0;
+}
+
static int apply_r_riscv_sub32_rela(struct module *me, u32 *location,
Elf_Addr v)
{
@@ -270,13 +289,20 @@ static int apply_r_riscv_sub32_rela(struct module *me, u32 *location,
return 0;
}
+static int apply_r_riscv_sub64_rela(struct module *me, u32 *location,
+ Elf_Addr v)
+{
+ *(u64 *)location -= (u64)v;
+ return 0;
+}
+
static int (*reloc_handlers_rela[]) (struct module *me, u32 *location,
Elf_Addr v) = {
[R_RISCV_32] = apply_r_riscv_32_rela,
[R_RISCV_64] = apply_r_riscv_64_rela,
[R_RISCV_BRANCH] = apply_r_riscv_branch_rela,
[R_RISCV_JAL] = apply_r_riscv_jal_rela,
- [R_RISCV_RVC_BRANCH] = apply_r_riscv_rcv_branch_rela,
+ [R_RISCV_RVC_BRANCH] = apply_r_riscv_rvc_branch_rela,
[R_RISCV_RVC_JUMP] = apply_r_riscv_rvc_jump_rela,
[R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela,
[R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela,
@@ -290,7 +316,9 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location,
[R_RISCV_RELAX] = apply_r_riscv_relax_rela,
[R_RISCV_ALIGN] = apply_r_riscv_align_rela,
[R_RISCV_ADD32] = apply_r_riscv_add32_rela,
+ [R_RISCV_ADD64] = apply_r_riscv_add64_rela,
[R_RISCV_SUB32] = apply_r_riscv_sub32_rela,
+ [R_RISCV_SUB64] = apply_r_riscv_sub64_rela,
};
int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
@@ -392,13 +420,39 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
}
#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
-#define VMALLOC_MODULE_START \
- max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START)
void *module_alloc(unsigned long size)
{
- return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START,
- VMALLOC_END, GFP_KERNEL,
- PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ return __vmalloc_node_range(size, 1, MODULES_VADDR,
+ MODULES_END, GFP_KERNEL,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
#endif
+
+static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
+{
+ const Elf_Shdr *s, *se;
+ const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+ for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+ if (strcmp(name, secstrs + s->sh_name) == 0)
+ return s;
+ }
+
+ return NULL;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+{
+ const Elf_Shdr *s;
+
+ s = find_section(hdr, sechdrs, ".alternative");
+ if (s)
+ apply_module_alternatives((void *)s->sh_addr, s->sh_size);
+
+ return 0;
+}
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index d4a64dfed342..765004b60513 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -20,7 +20,12 @@ struct patch_insn {
};
#ifdef CONFIG_MMU
-static void *patch_map(void *addr, int fixmap)
+/*
+ * The fix_to_virt(, idx) needs a const value (not a dynamic variable of
+ * reg-a0) or BUILD_BUG_ON failed with "idx >= __end_of_fixed_addresses".
+ * So use '__always_inline' and 'const unsigned int fixmap' here.
+ */
+static __always_inline void *patch_map(void *addr, const unsigned int fixmap)
{
uintptr_t uintaddr = (uintptr_t) addr;
struct page *page;
@@ -37,7 +42,6 @@ static void *patch_map(void *addr, int fixmap)
return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
(uintaddr & ~PAGE_MASK));
}
-NOKPROBE_SYMBOL(patch_map);
static void patch_unmap(int fixmap)
{
@@ -63,7 +67,7 @@ static int patch_insn_write(void *addr, const void *insn, size_t len)
waddr = patch_map(addr, FIX_TEXT_POKE0);
- ret = probe_kernel_write(waddr, insn, len);
+ ret = copy_to_kernel_nofault(waddr, insn, len);
patch_unmap(FIX_TEXT_POKE0);
@@ -76,7 +80,7 @@ NOKPROBE_SYMBOL(patch_insn_write);
#else
static int patch_insn_write(void *addr, const void *insn, size_t len)
{
- return probe_kernel_write(addr, insn, len);
+ return copy_to_kernel_nofault(addr, insn, len);
}
NOKPROBE_SYMBOL(patch_insn_write);
#endif /* CONFIG_MMU */
@@ -100,7 +104,7 @@ static int patch_text_cb(void *data)
struct patch_insn *patch = data;
int ret = 0;
- if (atomic_inc_return(&patch->cpu_count) == 1) {
+ if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
ret =
patch_text_nosync(patch->addr, &patch->insn,
GET_INSN_LENGTH(patch->insn));
diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c
index cf190197a22f..3348a61de7d9 100644
--- a/arch/riscv/kernel/perf_callchain.c
+++ b/arch/riscv/kernel/perf_callchain.c
@@ -4,11 +4,7 @@
#include <linux/perf_event.h>
#include <linux/uaccess.h>
-/* Kernel callchain */
-struct stackframe {
- unsigned long fp;
- unsigned long ra;
-};
+#include <asm/stacktrace.h>
/*
* Get the return address for a single stackframe and return a pointer to the
@@ -19,8 +15,8 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry,
{
struct stackframe buftail;
unsigned long ra = 0;
- unsigned long *user_frame_tail =
- (unsigned long *)(fp - sizeof(struct stackframe));
+ unsigned long __user *user_frame_tail =
+ (unsigned long __user *)(fp - sizeof(struct stackframe));
/* Check accessibility of one struct frame_tail beyond */
if (!access_ok(user_frame_tail, sizeof(buftail)))
@@ -62,10 +58,6 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
{
unsigned long fp = 0;
- /* RISC-V does not support perf in guest mode. */
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
- return;
-
fp = regs->s0;
perf_callchain_store(entry, regs->epc);
@@ -74,21 +66,13 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
fp = user_backtrace(entry, fp, 0);
}
-bool fill_callchain(unsigned long pc, void *entry)
+static bool fill_callchain(void *entry, unsigned long pc)
{
- return perf_callchain_store(entry, pc);
+ return perf_callchain_store(entry, pc) == 0;
}
-void notrace walk_stackframe(struct task_struct *task,
- struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg);
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- /* RISC-V does not support perf in guest mode. */
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- pr_warn("RISC-V does not support perf in guest mode!");
- return;
- }
-
walk_stackframe(NULL, regs, fill_callchain, entry);
}
diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
deleted file mode 100644
index c835f0362d94..000000000000
--- a/arch/riscv/kernel/perf_event.c
+++ /dev/null
@@ -1,485 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2009 Jaswinder Singh Rajput
- * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
- * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- * Copyright (C) 2009 Google, Inc., Stephane Eranian
- * Copyright 2014 Tilera Corporation. All Rights Reserved.
- * Copyright (C) 2018 Andes Technology Corporation
- *
- * Perf_events support for RISC-V platforms.
- *
- * Since the spec. (as of now, Priv-Spec 1.10) does not provide enough
- * functionality for perf event to fully work, this file provides
- * the very basic framework only.
- *
- * For platform portings, please check Documentations/riscv/pmu.txt.
- *
- * The Copyright line includes x86 and tile ones.
- */
-
-#include <linux/kprobes.h>
-#include <linux/kernel.h>
-#include <linux/kdebug.h>
-#include <linux/mutex.h>
-#include <linux/bitmap.h>
-#include <linux/irq.h>
-#include <linux/perf_event.h>
-#include <linux/atomic.h>
-#include <linux/of.h>
-#include <asm/perf_event.h>
-
-static const struct riscv_pmu *riscv_pmu __read_mostly;
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-
-/*
- * Hardware & cache maps and their methods
- */
-
-static const int riscv_hw_event_map[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE,
- [PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET,
- [PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP,
-};
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
-[PERF_COUNT_HW_CACHE_OP_MAX]
-[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
-};
-
-static int riscv_map_hw_event(u64 config)
-{
- if (config >= riscv_pmu->max_events)
- return -EINVAL;
-
- return riscv_pmu->hw_events[config];
-}
-
-static int riscv_map_cache_decode(u64 config, unsigned int *type,
- unsigned int *op, unsigned int *result)
-{
- return -ENOENT;
-}
-
-static int riscv_map_cache_event(u64 config)
-{
- unsigned int type, op, result;
- int err = -ENOENT;
- int code;
-
- err = riscv_map_cache_decode(config, &type, &op, &result);
- if (!riscv_pmu->cache_events || err)
- return err;
-
- if (type >= PERF_COUNT_HW_CACHE_MAX ||
- op >= PERF_COUNT_HW_CACHE_OP_MAX ||
- result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
- return -EINVAL;
-
- code = (*riscv_pmu->cache_events)[type][op][result];
- if (code == RISCV_OP_UNSUPP)
- return -EINVAL;
-
- return code;
-}
-
-/*
- * Low-level functions: reading/writing counters
- */
-
-static inline u64 read_counter(int idx)
-{
- u64 val = 0;
-
- switch (idx) {
- case RISCV_PMU_CYCLE:
- val = csr_read(CSR_CYCLE);
- break;
- case RISCV_PMU_INSTRET:
- val = csr_read(CSR_INSTRET);
- break;
- default:
- WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
- return -EINVAL;
- }
-
- return val;
-}
-
-static inline void write_counter(int idx, u64 value)
-{
- /* currently not supported */
- WARN_ON_ONCE(1);
-}
-
-/*
- * pmu->read: read and update the counter
- *
- * Other architectures' implementation often have a xxx_perf_event_update
- * routine, which can return counter values when called in the IRQ, but
- * return void when being called by the pmu->read method.
- */
-static void riscv_pmu_read(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 prev_raw_count, new_raw_count;
- u64 oldval;
- int idx = hwc->idx;
- u64 delta;
-
- do {
- prev_raw_count = local64_read(&hwc->prev_count);
- new_raw_count = read_counter(idx);
-
- oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count);
- } while (oldval != prev_raw_count);
-
- /*
- * delta is the value to update the counter we maintain in the kernel.
- */
- delta = (new_raw_count - prev_raw_count) &
- ((1ULL << riscv_pmu->counter_width) - 1);
- local64_add(delta, &event->count);
- /*
- * Something like local64_sub(delta, &hwc->period_left) here is
- * needed if there is an interrupt for perf.
- */
-}
-
-/*
- * State transition functions:
- *
- * stop()/start() & add()/del()
- */
-
-/*
- * pmu->stop: stop the counter
- */
-static void riscv_pmu_stop(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
-
- if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- riscv_pmu->pmu->read(event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-}
-
-/*
- * pmu->start: start the event.
- */
-static void riscv_pmu_start(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
- if (flags & PERF_EF_RELOAD) {
- WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-
- /*
- * Set the counter to the period to the next interrupt here,
- * if you have any.
- */
- }
-
- hwc->state = 0;
- perf_event_update_userpage(event);
-
- /*
- * Since we cannot write to counters, this serves as an initialization
- * to the delta-mechanism in pmu->read(); otherwise, the delta would be
- * wrong when pmu->read is called for the first time.
- */
- local64_set(&hwc->prev_count, read_counter(hwc->idx));
-}
-
-/*
- * pmu->add: add the event to PMU.
- */
-static int riscv_pmu_add(struct perf_event *event, int flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
-
- if (cpuc->n_events == riscv_pmu->num_counters)
- return -ENOSPC;
-
- /*
- * We don't have general conunters, so no binding-event-to-counter
- * process here.
- *
- * Indexing using hwc->config generally not works, since config may
- * contain extra information, but here the only info we have in
- * hwc->config is the event index.
- */
- hwc->idx = hwc->config;
- cpuc->events[hwc->idx] = event;
- cpuc->n_events++;
-
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
- if (flags & PERF_EF_START)
- riscv_pmu->pmu->start(event, PERF_EF_RELOAD);
-
- return 0;
-}
-
-/*
- * pmu->del: delete the event from PMU.
- */
-static void riscv_pmu_del(struct perf_event *event, int flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
-
- cpuc->events[hwc->idx] = NULL;
- cpuc->n_events--;
- riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
- perf_event_update_userpage(event);
-}
-
-/*
- * Interrupt: a skeletion for reference.
- */
-
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev)
-{
- return IRQ_NONE;
-}
-
-static int reserve_pmc_hardware(void)
-{
- int err = 0;
-
- mutex_lock(&pmc_reserve_mutex);
- if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) {
- err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq,
- IRQF_PERCPU, "riscv-base-perf", NULL);
- }
- mutex_unlock(&pmc_reserve_mutex);
-
- return err;
-}
-
-static void release_pmc_hardware(void)
-{
- mutex_lock(&pmc_reserve_mutex);
- if (riscv_pmu->irq >= 0)
- free_irq(riscv_pmu->irq, NULL);
- mutex_unlock(&pmc_reserve_mutex);
-}
-
-/*
- * Event Initialization/Finalization
- */
-
-static atomic_t riscv_active_events = ATOMIC_INIT(0);
-
-static void riscv_event_destroy(struct perf_event *event)
-{
- if (atomic_dec_return(&riscv_active_events) == 0)
- release_pmc_hardware();
-}
-
-static int riscv_event_init(struct perf_event *event)
-{
- struct perf_event_attr *attr = &event->attr;
- struct hw_perf_event *hwc = &event->hw;
- int err;
- int code;
-
- if (atomic_inc_return(&riscv_active_events) == 1) {
- err = reserve_pmc_hardware();
-
- if (err) {
- pr_warn("PMC hardware not available\n");
- atomic_dec(&riscv_active_events);
- return -EBUSY;
- }
- }
-
- switch (event->attr.type) {
- case PERF_TYPE_HARDWARE:
- code = riscv_pmu->map_hw_event(attr->config);
- break;
- case PERF_TYPE_HW_CACHE:
- code = riscv_pmu->map_cache_event(attr->config);
- break;
- case PERF_TYPE_RAW:
- return -EOPNOTSUPP;
- default:
- return -ENOENT;
- }
-
- event->destroy = riscv_event_destroy;
- if (code < 0) {
- event->destroy(event);
- return code;
- }
-
- /*
- * idx is set to -1 because the index of a general event should not be
- * decided until binding to some counter in pmu->add().
- *
- * But since we don't have such support, later in pmu->add(), we just
- * use hwc->config as the index instead.
- */
- hwc->config = code;
- hwc->idx = -1;
-
- return 0;
-}
-
-/*
- * Initialization
- */
-
-static struct pmu min_pmu = {
- .name = "riscv-base",
- .event_init = riscv_event_init,
- .add = riscv_pmu_add,
- .del = riscv_pmu_del,
- .start = riscv_pmu_start,
- .stop = riscv_pmu_stop,
- .read = riscv_pmu_read,
-};
-
-static const struct riscv_pmu riscv_base_pmu = {
- .pmu = &min_pmu,
- .max_events = ARRAY_SIZE(riscv_hw_event_map),
- .map_hw_event = riscv_map_hw_event,
- .hw_events = riscv_hw_event_map,
- .map_cache_event = riscv_map_cache_event,
- .cache_events = &riscv_cache_event_map,
- .counter_width = 63,
- .num_counters = RISCV_BASE_COUNTERS + 0,
- .handle_irq = &riscv_base_pmu_handle_irq,
-
- /* This means this PMU has no IRQ. */
- .irq = -1,
-};
-
-static const struct of_device_id riscv_pmu_of_ids[] = {
- {.compatible = "riscv,base-pmu", .data = &riscv_base_pmu},
- { /* sentinel value */ }
-};
-
-static int __init init_hw_perf_events(void)
-{
- struct device_node *node = of_find_node_by_type(NULL, "pmu");
- const struct of_device_id *of_id;
-
- riscv_pmu = &riscv_base_pmu;
-
- if (node) {
- of_id = of_match_node(riscv_pmu_of_ids, node);
-
- if (of_id)
- riscv_pmu = of_id->data;
- of_node_put(node);
- }
-
- perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW);
- return 0;
-}
-arch_initcall(init_hw_perf_events);
diff --git a/arch/riscv/kernel/perf_regs.c b/arch/riscv/kernel/perf_regs.c
index 04a38fbeb9c7..fd304a248de6 100644
--- a/arch/riscv/kernel/perf_regs.c
+++ b/arch/riscv/kernel/perf_regs.c
@@ -36,8 +36,7 @@ u64 perf_reg_abi(struct task_struct *task)
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile
new file mode 100644
index 000000000000..7f0840dcc31b
--- /dev/null
+++ b/arch/riscv/kernel/probes/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o
+obj-$(CONFIG_KPROBES) += kprobes_trampoline.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
+obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o
+CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c
new file mode 100644
index 000000000000..64f6183b4717
--- /dev/null
+++ b/arch/riscv/kernel/probes/decode-insn.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <asm/sections.h>
+
+#include "decode-insn.h"
+#include "simulate-insn.h"
+
+/* Return:
+ * INSN_REJECTED If instruction is one not allowed to kprobe,
+ * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
+ */
+enum probe_insn __kprobes
+riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api)
+{
+ probe_opcode_t insn = *addr;
+
+ /*
+ * Reject instructions list:
+ */
+ RISCV_INSN_REJECTED(system, insn);
+ RISCV_INSN_REJECTED(fence, insn);
+
+ /*
+ * Simulate instructions list:
+ * TODO: the REJECTED ones below need to be implemented
+ */
+#ifdef CONFIG_RISCV_ISA_C
+ RISCV_INSN_REJECTED(c_j, insn);
+ RISCV_INSN_REJECTED(c_jr, insn);
+ RISCV_INSN_REJECTED(c_jal, insn);
+ RISCV_INSN_REJECTED(c_jalr, insn);
+ RISCV_INSN_REJECTED(c_beqz, insn);
+ RISCV_INSN_REJECTED(c_bnez, insn);
+ RISCV_INSN_REJECTED(c_ebreak, insn);
+#endif
+
+ RISCV_INSN_SET_SIMULATE(jal, insn);
+ RISCV_INSN_SET_SIMULATE(jalr, insn);
+ RISCV_INSN_SET_SIMULATE(auipc, insn);
+ RISCV_INSN_SET_SIMULATE(branch, insn);
+
+ return INSN_GOOD;
+}
diff --git a/arch/riscv/kernel/probes/decode-insn.h b/arch/riscv/kernel/probes/decode-insn.h
new file mode 100644
index 000000000000..42269a7d676d
--- /dev/null
+++ b/arch/riscv/kernel/probes/decode-insn.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _RISCV_KERNEL_KPROBES_DECODE_INSN_H
+#define _RISCV_KERNEL_KPROBES_DECODE_INSN_H
+
+#include <asm/sections.h>
+#include <asm/kprobes.h>
+
+enum probe_insn {
+ INSN_REJECTED,
+ INSN_GOOD_NO_SLOT,
+ INSN_GOOD,
+};
+
+enum probe_insn __kprobes
+riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *asi);
+
+#endif /* _RISCV_KERNEL_KPROBES_DECODE_INSN_H */
diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c
new file mode 100644
index 000000000000..7142ec42e889
--- /dev/null
+++ b/arch/riscv/kernel/probes/ftrace.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kprobes.h>
+
+/* Ftrace callback handler for kprobes -- called under preepmt disabled */
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+ struct kprobe *p;
+ struct pt_regs *regs;
+ struct kprobe_ctlblk *kcb;
+ int bit;
+
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0)
+ return;
+
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto out;
+
+ regs = ftrace_get_regs(fregs);
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ unsigned long orig_ip = instruction_pointer(regs);
+
+ instruction_pointer_set(regs, ip);
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
+ /*
+ * Emulate singlestep (and also recover regs->pc)
+ * as if there is a nop
+ */
+ instruction_pointer_set(regs,
+ (unsigned long)p->addr + MCOUNT_INSN_SIZE);
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ instruction_pointer_set(regs, orig_ip);
+ }
+
+ /*
+ * If pre_handler returns !0, it changes regs->pc. We have to
+ * skip emulating post_handler.
+ */
+ __this_cpu_write(current_kprobe, NULL);
+ }
+out:
+ ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.api.insn = NULL;
+ return 0;
+}
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
new file mode 100644
index 000000000000..e6e950b7cf32
--- /dev/null
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#define pr_fmt(fmt) "kprobes: " fmt
+
+#include <linux/kprobes.h>
+#include <linux/extable.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+#include <asm/ptrace.h>
+#include <linux/uaccess.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/bug.h>
+#include <asm/patch.h>
+
+#include "decode-insn.h"
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static void __kprobes
+post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
+
+static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
+{
+ unsigned long offset = GET_INSN_LENGTH(p->opcode);
+
+ p->ainsn.api.restore = (unsigned long)p->addr + offset;
+
+ patch_text(p->ainsn.api.insn, p->opcode);
+ patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset),
+ __BUG_INSN_32);
+}
+
+static void __kprobes arch_prepare_simulate(struct kprobe *p)
+{
+ p->ainsn.api.restore = 0;
+}
+
+static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (p->ainsn.api.handler)
+ p->ainsn.api.handler((u32)p->opcode,
+ (unsigned long)p->addr, regs);
+
+ post_kprobe_handler(p, kcb, regs);
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ unsigned long probe_addr = (unsigned long)p->addr;
+
+ if (probe_addr & 0x1)
+ return -EILSEQ;
+
+ /* copy instruction */
+ p->opcode = *p->addr;
+
+ /* decode instruction */
+ switch (riscv_probe_decode_insn(p->addr, &p->ainsn.api)) {
+ case INSN_REJECTED: /* insn not supported */
+ return -EINVAL;
+
+ case INSN_GOOD_NO_SLOT: /* insn need simulation */
+ p->ainsn.api.insn = NULL;
+ break;
+
+ case INSN_GOOD: /* instruction uses slot */
+ p->ainsn.api.insn = get_insn_slot();
+ if (!p->ainsn.api.insn)
+ return -ENOMEM;
+ break;
+ }
+
+ /* prepare the instruction */
+ if (p->ainsn.api.insn)
+ arch_prepare_ss_slot(p);
+ else
+ arch_prepare_simulate(p);
+
+ return 0;
+}
+
+#ifdef CONFIG_MMU
+void *alloc_insn_page(void)
+{
+ return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
+ VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+#endif
+
+/* install breakpoint in text */
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ if ((p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32)
+ patch_text(p->addr, __BUG_INSN_32);
+ else
+ patch_text(p->addr, __BUG_INSN_16);
+}
+
+/* remove breakpoint from text */
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ patch_text(p->addr, p->opcode);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+ __this_cpu_write(current_kprobe, p);
+}
+
+/*
+ * Interrupts need to be disabled before single-step mode is set, and not
+ * reenabled until after single-step mode ends.
+ * Without disabling interrupt on local CPU, there is a chance of
+ * interrupt occurrence in the period of exception return and start of
+ * out-of-line single-step, that result in wrongly single stepping
+ * into the interrupt handler.
+ */
+static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs)
+{
+ kcb->saved_status = regs->status;
+ regs->status &= ~SR_SPIE;
+}
+
+static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs)
+{
+ regs->status = kcb->saved_status;
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb, int reenter)
+{
+ unsigned long slot;
+
+ if (reenter) {
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ } else {
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ }
+
+ if (p->ainsn.api.insn) {
+ /* prepare for single stepping */
+ slot = (unsigned long)p->ainsn.api.insn;
+
+ /* IRQs and single stepping do not mix well. */
+ kprobes_save_local_irqflag(kcb, regs);
+
+ instruction_pointer_set(regs, slot);
+ } else {
+ /* insn simulation */
+ arch_simulate_insn(p, regs);
+ }
+}
+
+static int __kprobes reenter_kprobe(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SSDONE:
+ case KPROBE_HIT_ACTIVE:
+ kprobes_inc_nmissed_count(p);
+ setup_singlestep(p, regs, kcb, 1);
+ break;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ pr_warn("Failed to recover from reentered kprobes.\n");
+ dump_kprobe(p);
+ BUG();
+ break;
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void __kprobes
+post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb, struct pt_regs *regs)
+{
+ /* return addr restore if non-branching insn */
+ if (cur->ainsn.api.restore != 0)
+ regs->epc = cur->ainsn.api.restore;
+
+ /* restore back original saved kprobe variables and continue */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ return;
+ }
+
+ /* call post handler */
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ if (cur->post_handler) {
+ /* post_handler can hit breakpoint and single step
+ * again, so we enable D-flag for recursive exception.
+ */
+ cur->post_handler(cur, regs, 0);
+ }
+
+ reset_current_kprobe();
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the ip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ regs->epc = (unsigned long) cur->addr;
+ BUG_ON(!instruction_pointer(regs));
+
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else {
+ kprobes_restore_local_irqflag(kcb, regs);
+ reset_current_kprobe();
+ }
+
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+ if (fixup_exception(regs))
+ return 1;
+ }
+ return 0;
+}
+
+bool __kprobes
+kprobe_breakpoint_handler(struct pt_regs *regs)
+{
+ struct kprobe *p, *cur_kprobe;
+ struct kprobe_ctlblk *kcb;
+ unsigned long addr = instruction_pointer(regs);
+
+ kcb = get_kprobe_ctlblk();
+ cur_kprobe = kprobe_running();
+
+ p = get_kprobe((kprobe_opcode_t *) addr);
+
+ if (p) {
+ if (cur_kprobe) {
+ if (reenter_kprobe(p, regs, kcb))
+ return true;
+ } else {
+ /* Probe hit */
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with normal processing. If we have a
+ * pre-handler and it returned non-zero, it will
+ * modify the execution path and no need to single
+ * stepping. Let's just reset current kprobe and exit.
+ *
+ * pre_handler can hit a breakpoint and can step thru
+ * before return.
+ */
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ setup_singlestep(p, regs, kcb, 0);
+ else
+ reset_current_kprobe();
+ }
+ return true;
+ }
+
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ * Return back to original instruction, and continue.
+ */
+ return false;
+}
+
+bool __kprobes
+kprobe_single_step_handler(struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long addr = instruction_pointer(regs);
+ struct kprobe *cur = kprobe_running();
+
+ if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) &&
+ ((unsigned long)&cur->ainsn.api.insn[0] + GET_INSN_LENGTH(cur->opcode) == addr)) {
+ kprobes_restore_local_irqflag(kcb, regs);
+ post_kprobe_handler(cur, kcb, regs);
+ return true;
+ }
+ /* not ours, kprobes should ignore it */
+ return false;
+}
+
+/*
+ * Provide a blacklist of symbols identifying ranges which cannot be kprobed.
+ * This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
+ */
+int __init arch_populate_kprobe_blacklist(void)
+{
+ int ret;
+
+ ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+ (unsigned long)__irqentry_text_end);
+ return ret;
+}
+
+void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
+{
+ return (void *)kretprobe_trampoline_handler(regs, NULL);
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *)regs->ra;
+ ri->fp = NULL;
+ regs->ra = (unsigned long) &__kretprobe_trampoline;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ return 0;
+}
+
+int __init arch_init_kprobes(void)
+{
+ return 0;
+}
diff --git a/arch/riscv/kernel/probes/kprobes_trampoline.S b/arch/riscv/kernel/probes/kprobes_trampoline.S
new file mode 100644
index 000000000000..7bdb09ded39b
--- /dev/null
+++ b/arch/riscv/kernel/probes/kprobes_trampoline.S
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Author: Patrick Stählin <me@packi.ch>
+ */
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+
+ .text
+ .altmacro
+
+ .macro save_all_base_regs
+ REG_S x1, PT_RA(sp)
+ REG_S x3, PT_GP(sp)
+ REG_S x4, PT_TP(sp)
+ REG_S x5, PT_T0(sp)
+ REG_S x6, PT_T1(sp)
+ REG_S x7, PT_T2(sp)
+ REG_S x8, PT_S0(sp)
+ REG_S x9, PT_S1(sp)
+ REG_S x10, PT_A0(sp)
+ REG_S x11, PT_A1(sp)
+ REG_S x12, PT_A2(sp)
+ REG_S x13, PT_A3(sp)
+ REG_S x14, PT_A4(sp)
+ REG_S x15, PT_A5(sp)
+ REG_S x16, PT_A6(sp)
+ REG_S x17, PT_A7(sp)
+ REG_S x18, PT_S2(sp)
+ REG_S x19, PT_S3(sp)
+ REG_S x20, PT_S4(sp)
+ REG_S x21, PT_S5(sp)
+ REG_S x22, PT_S6(sp)
+ REG_S x23, PT_S7(sp)
+ REG_S x24, PT_S8(sp)
+ REG_S x25, PT_S9(sp)
+ REG_S x26, PT_S10(sp)
+ REG_S x27, PT_S11(sp)
+ REG_S x28, PT_T3(sp)
+ REG_S x29, PT_T4(sp)
+ REG_S x30, PT_T5(sp)
+ REG_S x31, PT_T6(sp)
+ .endm
+
+ .macro restore_all_base_regs
+ REG_L x3, PT_GP(sp)
+ REG_L x4, PT_TP(sp)
+ REG_L x5, PT_T0(sp)
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x8, PT_S0(sp)
+ REG_L x9, PT_S1(sp)
+ REG_L x10, PT_A0(sp)
+ REG_L x11, PT_A1(sp)
+ REG_L x12, PT_A2(sp)
+ REG_L x13, PT_A3(sp)
+ REG_L x14, PT_A4(sp)
+ REG_L x15, PT_A5(sp)
+ REG_L x16, PT_A6(sp)
+ REG_L x17, PT_A7(sp)
+ REG_L x18, PT_S2(sp)
+ REG_L x19, PT_S3(sp)
+ REG_L x20, PT_S4(sp)
+ REG_L x21, PT_S5(sp)
+ REG_L x22, PT_S6(sp)
+ REG_L x23, PT_S7(sp)
+ REG_L x24, PT_S8(sp)
+ REG_L x25, PT_S9(sp)
+ REG_L x26, PT_S10(sp)
+ REG_L x27, PT_S11(sp)
+ REG_L x28, PT_T3(sp)
+ REG_L x29, PT_T4(sp)
+ REG_L x30, PT_T5(sp)
+ REG_L x31, PT_T6(sp)
+ .endm
+
+ENTRY(__kretprobe_trampoline)
+ addi sp, sp, -(PT_SIZE_ON_STACK)
+ save_all_base_regs
+
+ move a0, sp /* pt_regs */
+
+ call trampoline_probe_handler
+
+ /* use the result as the return-address */
+ move ra, a0
+
+ restore_all_base_regs
+ addi sp, sp, PT_SIZE_ON_STACK
+
+ ret
+ENDPROC(__kretprobe_trampoline)
diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
new file mode 100644
index 000000000000..d73e96f6ed7c
--- /dev/null
+++ b/arch/riscv/kernel/probes/simulate-insn.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+
+#include "decode-insn.h"
+#include "simulate-insn.h"
+
+static inline bool rv_insn_reg_get_val(struct pt_regs *regs, u32 index,
+ unsigned long *ptr)
+{
+ if (index == 0)
+ *ptr = 0;
+ else if (index <= 31)
+ *ptr = *((unsigned long *)regs + index);
+ else
+ return false;
+
+ return true;
+}
+
+static inline bool rv_insn_reg_set_val(struct pt_regs *regs, u32 index,
+ unsigned long val)
+{
+ if (index == 0)
+ return false;
+ else if (index <= 31)
+ *((unsigned long *)regs + index) = val;
+ else
+ return false;
+
+ return true;
+}
+
+bool __kprobes simulate_jal(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ /*
+ * 31 30 21 20 19 12 11 7 6 0
+ * imm [20] | imm[10:1] | imm[11] | imm[19:12] | rd | opcode
+ * 1 10 1 8 5 JAL/J
+ */
+ bool ret;
+ u32 imm;
+ u32 index = (opcode >> 7) & 0x1f;
+
+ ret = rv_insn_reg_set_val(regs, index, addr + 4);
+ if (!ret)
+ return ret;
+
+ imm = ((opcode >> 21) & 0x3ff) << 1;
+ imm |= ((opcode >> 20) & 0x1) << 11;
+ imm |= ((opcode >> 12) & 0xff) << 12;
+ imm |= ((opcode >> 31) & 0x1) << 20;
+
+ instruction_pointer_set(regs, addr + sign_extend32((imm), 20));
+
+ return ret;
+}
+
+bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ /*
+ * 31 20 19 15 14 12 11 7 6 0
+ * offset[11:0] | rs1 | 010 | rd | opcode
+ * 12 5 3 5 JALR/JR
+ */
+ bool ret;
+ unsigned long base_addr;
+ u32 imm = (opcode >> 20) & 0xfff;
+ u32 rd_index = (opcode >> 7) & 0x1f;
+ u32 rs1_index = (opcode >> 15) & 0x1f;
+
+ ret = rv_insn_reg_set_val(regs, rd_index, addr + 4);
+ if (!ret)
+ return ret;
+
+ ret = rv_insn_reg_get_val(regs, rs1_index, &base_addr);
+ if (!ret)
+ return ret;
+
+ instruction_pointer_set(regs, (base_addr + sign_extend32((imm), 11))&~1);
+
+ return ret;
+}
+
+#define auipc_rd_idx(opcode) \
+ ((opcode >> 7) & 0x1f)
+
+#define auipc_imm(opcode) \
+ ((((opcode) >> 12) & 0xfffff) << 12)
+
+#if __riscv_xlen == 64
+#define auipc_offset(opcode) sign_extend64(auipc_imm(opcode), 31)
+#elif __riscv_xlen == 32
+#define auipc_offset(opcode) auipc_imm(opcode)
+#else
+#error "Unexpected __riscv_xlen"
+#endif
+
+bool __kprobes simulate_auipc(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ /*
+ * auipc instruction:
+ * 31 12 11 7 6 0
+ * | imm[31:12] | rd | opcode |
+ * 20 5 7
+ */
+
+ u32 rd_idx = auipc_rd_idx(opcode);
+ unsigned long rd_val = addr + auipc_offset(opcode);
+
+ if (!rv_insn_reg_set_val(regs, rd_idx, rd_val))
+ return false;
+
+ instruction_pointer_set(regs, addr + 4);
+
+ return true;
+}
+
+#define branch_rs1_idx(opcode) \
+ (((opcode) >> 15) & 0x1f)
+
+#define branch_rs2_idx(opcode) \
+ (((opcode) >> 20) & 0x1f)
+
+#define branch_funct3(opcode) \
+ (((opcode) >> 12) & 0x7)
+
+#define branch_imm(opcode) \
+ (((((opcode) >> 8) & 0xf ) << 1) | \
+ ((((opcode) >> 25) & 0x3f) << 5) | \
+ ((((opcode) >> 7) & 0x1 ) << 11) | \
+ ((((opcode) >> 31) & 0x1 ) << 12))
+
+#define branch_offset(opcode) \
+ sign_extend32((branch_imm(opcode)), 12)
+
+#define BRANCH_BEQ 0x0
+#define BRANCH_BNE 0x1
+#define BRANCH_BLT 0x4
+#define BRANCH_BGE 0x5
+#define BRANCH_BLTU 0x6
+#define BRANCH_BGEU 0x7
+
+bool __kprobes simulate_branch(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ /*
+ * branch instructions:
+ * 31 30 25 24 20 19 15 14 12 11 8 7 6 0
+ * | imm[12] | imm[10:5] | rs2 | rs1 | funct3 | imm[4:1] | imm[11] | opcode |
+ * 1 6 5 5 3 4 1 7
+ * imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ
+ * imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE
+ * imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT
+ * imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE
+ * imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU
+ * imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU
+ */
+
+ s32 offset;
+ s32 offset_tmp;
+ unsigned long rs1_val;
+ unsigned long rs2_val;
+
+ if (!rv_insn_reg_get_val(regs, branch_rs1_idx(opcode), &rs1_val) ||
+ !rv_insn_reg_get_val(regs, branch_rs2_idx(opcode), &rs2_val))
+ return false;
+
+ offset_tmp = branch_offset(opcode);
+ switch (branch_funct3(opcode)) {
+ case BRANCH_BEQ:
+ offset = (rs1_val == rs2_val) ? offset_tmp : 4;
+ break;
+ case BRANCH_BNE:
+ offset = (rs1_val != rs2_val) ? offset_tmp : 4;
+ break;
+ case BRANCH_BLT:
+ offset = ((long)rs1_val < (long)rs2_val) ? offset_tmp : 4;
+ break;
+ case BRANCH_BGE:
+ offset = ((long)rs1_val >= (long)rs2_val) ? offset_tmp : 4;
+ break;
+ case BRANCH_BLTU:
+ offset = (rs1_val < rs2_val) ? offset_tmp : 4;
+ break;
+ case BRANCH_BGEU:
+ offset = (rs1_val >= rs2_val) ? offset_tmp : 4;
+ break;
+ default:
+ return false;
+ }
+
+ instruction_pointer_set(regs, addr + offset);
+
+ return true;
+}
diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h
new file mode 100644
index 000000000000..cb6ff7dccb92
--- /dev/null
+++ b/arch/riscv/kernel/probes/simulate-insn.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _RISCV_KERNEL_PROBES_SIMULATE_INSN_H
+#define _RISCV_KERNEL_PROBES_SIMULATE_INSN_H
+
+#define __RISCV_INSN_FUNCS(name, mask, val) \
+static __always_inline bool riscv_insn_is_##name(probe_opcode_t code) \
+{ \
+ BUILD_BUG_ON(~(mask) & (val)); \
+ return (code & (mask)) == (val); \
+} \
+bool simulate_##name(u32 opcode, unsigned long addr, \
+ struct pt_regs *regs)
+
+#define RISCV_INSN_REJECTED(name, code) \
+ do { \
+ if (riscv_insn_is_##name(code)) { \
+ return INSN_REJECTED; \
+ } \
+ } while (0)
+
+__RISCV_INSN_FUNCS(system, 0x7f, 0x73);
+__RISCV_INSN_FUNCS(fence, 0x7f, 0x0f);
+
+#define RISCV_INSN_SET_SIMULATE(name, code) \
+ do { \
+ if (riscv_insn_is_##name(code)) { \
+ api->handler = simulate_##name; \
+ return INSN_GOOD_NO_SLOT; \
+ } \
+ } while (0)
+
+__RISCV_INSN_FUNCS(c_j, 0xe003, 0xa001);
+__RISCV_INSN_FUNCS(c_jr, 0xf007, 0x8002);
+__RISCV_INSN_FUNCS(c_jal, 0xe003, 0x2001);
+__RISCV_INSN_FUNCS(c_jalr, 0xf007, 0x9002);
+__RISCV_INSN_FUNCS(c_beqz, 0xe003, 0xc001);
+__RISCV_INSN_FUNCS(c_bnez, 0xe003, 0xe001);
+__RISCV_INSN_FUNCS(c_ebreak, 0xffff, 0x9002);
+
+__RISCV_INSN_FUNCS(auipc, 0x7f, 0x17);
+__RISCV_INSN_FUNCS(branch, 0x7f, 0x63);
+
+__RISCV_INSN_FUNCS(jal, 0x7f, 0x6f);
+__RISCV_INSN_FUNCS(jalr, 0x707f, 0x67);
+
+#endif /* _RISCV_KERNEL_PROBES_SIMULATE_INSN_H */
diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c
new file mode 100644
index 000000000000..7a057b5f0adc
--- /dev/null
+++ b/arch/riscv/kernel/probes/uprobes.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/highmem.h>
+#include <linux/ptrace.h>
+#include <linux/uprobes.h>
+
+#include "decode-insn.h"
+
+#define UPROBE_TRAP_NR UINT_MAX
+
+bool is_swbp_insn(uprobe_opcode_t *insn)
+{
+#ifdef CONFIG_RISCV_ISA_C
+ return (*insn & 0xffff) == UPROBE_SWBP_INSN;
+#else
+ return *insn == UPROBE_SWBP_INSN;
+#endif
+}
+
+unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
+{
+ return instruction_pointer(regs);
+}
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+ unsigned long addr)
+{
+ probe_opcode_t opcode;
+
+ opcode = *(probe_opcode_t *)(&auprobe->insn[0]);
+
+ auprobe->insn_size = GET_INSN_LENGTH(opcode);
+
+ switch (riscv_probe_decode_insn(&opcode, &auprobe->api)) {
+ case INSN_REJECTED:
+ return -EINVAL;
+
+ case INSN_GOOD_NO_SLOT:
+ auprobe->simulate = true;
+ break;
+
+ case INSN_GOOD:
+ auprobe->simulate = false;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ utask->autask.saved_cause = current->thread.bad_cause;
+ current->thread.bad_cause = UPROBE_TRAP_NR;
+
+ instruction_pointer_set(regs, utask->xol_vaddr);
+
+ regs->status &= ~SR_SPIE;
+
+ return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ WARN_ON_ONCE(current->thread.bad_cause != UPROBE_TRAP_NR);
+
+ instruction_pointer_set(regs, utask->vaddr + auprobe->insn_size);
+
+ regs->status |= SR_SPIE;
+
+ return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *t)
+{
+ if (t->thread.bad_cause != UPROBE_TRAP_NR)
+ return true;
+
+ return false;
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ probe_opcode_t insn;
+ unsigned long addr;
+
+ if (!auprobe->simulate)
+ return false;
+
+ insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ addr = instruction_pointer(regs);
+
+ if (auprobe->api.handler)
+ auprobe->api.handler(insn, addr, regs);
+
+ return true;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ /*
+ * Task has received a fatal signal, so reset back to probbed
+ * address.
+ */
+ instruction_pointer_set(regs, utask->vaddr);
+
+ regs->status &= ~SR_SPIE;
+}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+ struct pt_regs *regs)
+{
+ if (ctx == RP_CHECK_CHAIN_CALL)
+ return regs->sp <= ret->stack;
+ else
+ return regs->sp < ret->stack;
+}
+
+unsigned long
+arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
+ struct pt_regs *regs)
+{
+ unsigned long ra;
+
+ ra = regs->ra;
+
+ regs->ra = trampoline_vaddr;
+
+ return ra;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ return NOTIFY_DONE;
+}
+
+bool uprobe_breakpoint_handler(struct pt_regs *regs)
+{
+ if (uprobe_pre_sstep_notifier(regs))
+ return true;
+
+ return false;
+}
+
+bool uprobe_single_step_handler(struct pt_regs *regs)
+{
+ if (uprobe_post_sstep_notifier(regs))
+ return true;
+
+ return false;
+}
+
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+ void *src, unsigned long len)
+{
+ /* Initialize the slot */
+ void *kaddr = kmap_atomic(page);
+ void *dst = kaddr + (vaddr & ~PAGE_MASK);
+
+ memcpy(dst, src, len);
+
+ /* Add ebreak behind opcode to simulate singlestep */
+ if (vaddr) {
+ dst += GET_INSN_LENGTH(*(probe_opcode_t *)src);
+ *(uprobe_opcode_t *)dst = __BUG_INSN_32;
+ }
+
+ kunmap_atomic(kaddr);
+
+ /*
+ * We probably need flush_icache_user_page() but it needs vma.
+ * This should work on most of architectures by default. If
+ * architecture needs to do something different it can define
+ * its own version of the function.
+ */
+ flush_dcache_page(page);
+}
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 824d117cf202..ceb9ebab6558 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -10,6 +10,7 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/tick.h>
#include <linux/ptrace.h>
@@ -18,26 +19,39 @@
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/csr.h>
+#include <asm/stacktrace.h>
#include <asm/string.h>
#include <asm/switch_to.h>
#include <asm/thread_info.h>
+#include <asm/cpuidle.h>
register unsigned long gp_in_global __asm__("gp");
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
+#include <linux/stackprotector.h>
+unsigned long __stack_chk_guard __read_mostly;
+EXPORT_SYMBOL(__stack_chk_guard);
+#endif
+
extern asmlinkage void ret_from_fork(void);
extern asmlinkage void ret_from_kernel_thread(void);
void arch_cpu_idle(void)
{
- wait_for_interrupt();
- local_irq_enable();
+ cpu_do_idle();
+ raw_local_irq_enable();
}
-void show_regs(struct pt_regs *regs)
+void __show_regs(struct pt_regs *regs)
{
show_regs_print_info(KERN_DEFAULT);
- pr_cont("epc: " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
+ if (!user_mode(regs)) {
+ pr_cont("epc : %pS\n", (void *)regs->epc);
+ pr_cont(" ra : %pS\n", (void *)regs->ra);
+ }
+
+ pr_cont("epc : " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
regs->epc, regs->ra, regs->sp);
pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n",
regs->gp, regs->tp, regs->t0);
@@ -63,12 +77,46 @@ void show_regs(struct pt_regs *regs)
pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n",
regs->status, regs->badaddr, regs->cause);
}
+void show_regs(struct pt_regs *regs)
+{
+ __show_regs(regs);
+ if (!user_mode(regs))
+ dump_backtrace(regs, NULL, KERN_DEFAULT);
+}
+
+#ifdef CONFIG_COMPAT
+static bool compat_mode_supported __read_mostly;
+
+bool compat_elf_check_arch(Elf32_Ehdr *hdr)
+{
+ return compat_mode_supported &&
+ hdr->e_machine == EM_RISCV &&
+ hdr->e_ident[EI_CLASS] == ELFCLASS32;
+}
+
+static int __init compat_mode_detect(void)
+{
+ unsigned long tmp = csr_read(CSR_STATUS);
+
+ csr_write(CSR_STATUS, (tmp & ~SR_UXL) | SR_UXL_32);
+ compat_mode_supported =
+ (csr_read(CSR_STATUS) & SR_UXL) == SR_UXL_32;
+
+ csr_write(CSR_STATUS, tmp);
+
+ pr_info("riscv: ELF compat mode %s",
+ compat_mode_supported ? "supported" : "failed");
+
+ return 0;
+}
+early_initcall(compat_mode_detect);
+#endif
void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
regs->status = SR_PIE;
- if (has_fpu) {
+ if (has_fpu()) {
regs->status |= SR_FS_INITIAL;
/*
* Restore the initial value to the FP register
@@ -78,7 +126,15 @@ void start_thread(struct pt_regs *regs, unsigned long pc,
}
regs->epc = pc;
regs->sp = sp;
- set_fs(USER_DS);
+
+#ifdef CONFIG_64BIT
+ regs->status &= ~SR_UXL;
+
+ if (is_compat_task())
+ regs->status |= SR_UXL_32;
+ else
+ regs->status |= SR_UXL_64;
+#endif
}
void flush_thread(void)
@@ -101,13 +157,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
-int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
- unsigned long arg, struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
/* p->thread holds context to be restored by __switch_to() */
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(args->fn)) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gp = gp_in_global;
@@ -115,8 +173,8 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
childregs->status = SR_PP | SR_PIE;
p->thread.ra = (unsigned long)ret_from_kernel_thread;
- p->thread.s[0] = usp; /* fn */
- p->thread.s[1] = arg;
+ p->thread.s[0] = (unsigned long)args->fn;
+ p->thread.s[1] = (unsigned long)args->fn_arg;
} else {
*childregs = *(current_pt_regs());
if (usp) /* User fork */
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 444dc7b0fd78..2ae8280ae475 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -10,13 +10,14 @@
#include <asm/ptrace.h>
#include <asm/syscall.h>
#include <asm/thread_info.h>
+#include <asm/switch_to.h>
#include <linux/audit.h>
+#include <linux/compat.h>
#include <linux/ptrace.h>
#include <linux/elf.h>
#include <linux/regset.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
-#include <linux/tracehook.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -30,13 +31,10 @@ enum riscv_regset {
static int riscv_gpr_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- struct pt_regs *regs;
-
- regs = task_pt_regs(target);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
+ return membuf_write(&to, task_pt_regs(target),
+ sizeof(struct user_regs_struct));
}
static int riscv_gpr_set(struct task_struct *target,
@@ -44,32 +42,25 @@ static int riscv_gpr_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
struct pt_regs *regs;
regs = task_pt_regs(target);
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
}
#ifdef CONFIG_FPU
static int riscv_fpr_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- int ret;
struct __riscv_d_ext_state *fstate = &target->thread.fstate;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0,
- offsetof(struct __riscv_d_ext_state, fcsr));
- if (!ret) {
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0,
- offsetof(struct __riscv_d_ext_state, fcsr) +
- sizeof(fstate->fcsr));
- }
+ if (target == current)
+ fstate_save(current, task_pt_regs(current));
- return ret;
+ membuf_write(&to, fstate, offsetof(struct __riscv_d_ext_state, fcsr));
+ membuf_store(&to, fstate->fcsr);
+ return membuf_zero(&to, 4); // explicitly pad
}
static int riscv_fpr_set(struct task_struct *target,
@@ -98,8 +89,8 @@ static const struct user_regset riscv_user_regset[] = {
.n = ELF_NGREG,
.size = sizeof(elf_greg_t),
.align = sizeof(elf_greg_t),
- .get = &riscv_gpr_get,
- .set = &riscv_gpr_set,
+ .regset_get = riscv_gpr_get,
+ .set = riscv_gpr_set,
},
#ifdef CONFIG_FPU
[REGSET_F] = {
@@ -107,8 +98,8 @@ static const struct user_regset riscv_user_regset[] = {
.n = ELF_NFPREG,
.size = sizeof(elf_fpreg_t),
.align = sizeof(elf_fpreg_t),
- .get = &riscv_fpr_get,
- .set = &riscv_fpr_set,
+ .regset_get = riscv_fpr_get,
+ .set = riscv_fpr_set,
},
#endif
};
@@ -120,9 +111,103 @@ static const struct user_regset_view riscv_user_native_view = {
.n = ARRAY_SIZE(riscv_user_regset),
};
-const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+ REG_OFFSET_NAME(epc),
+ REG_OFFSET_NAME(ra),
+ REG_OFFSET_NAME(sp),
+ REG_OFFSET_NAME(gp),
+ REG_OFFSET_NAME(tp),
+ REG_OFFSET_NAME(t0),
+ REG_OFFSET_NAME(t1),
+ REG_OFFSET_NAME(t2),
+ REG_OFFSET_NAME(s0),
+ REG_OFFSET_NAME(s1),
+ REG_OFFSET_NAME(a0),
+ REG_OFFSET_NAME(a1),
+ REG_OFFSET_NAME(a2),
+ REG_OFFSET_NAME(a3),
+ REG_OFFSET_NAME(a4),
+ REG_OFFSET_NAME(a5),
+ REG_OFFSET_NAME(a6),
+ REG_OFFSET_NAME(a7),
+ REG_OFFSET_NAME(s2),
+ REG_OFFSET_NAME(s3),
+ REG_OFFSET_NAME(s4),
+ REG_OFFSET_NAME(s5),
+ REG_OFFSET_NAME(s6),
+ REG_OFFSET_NAME(s7),
+ REG_OFFSET_NAME(s8),
+ REG_OFFSET_NAME(s9),
+ REG_OFFSET_NAME(s10),
+ REG_OFFSET_NAME(s11),
+ REG_OFFSET_NAME(t3),
+ REG_OFFSET_NAME(t4),
+ REG_OFFSET_NAME(t5),
+ REG_OFFSET_NAME(t6),
+ REG_OFFSET_NAME(status),
+ REG_OFFSET_NAME(badaddr),
+ REG_OFFSET_NAME(cause),
+ REG_OFFSET_NAME(orig_a0),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
{
- return &riscv_user_native_view;
+ const struct pt_regs_offset *roff;
+
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @addr: address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ return (addr & ~(THREAD_SIZE - 1)) ==
+ (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @n: stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+
+ addr += n;
+ if (regs_within_kernel_stack(regs, (unsigned long)addr))
+ return *addr;
+ else
+ return 0;
}
void ptrace_disable(struct task_struct *child)
@@ -151,7 +236,7 @@ long arch_ptrace(struct task_struct *child, long request,
__visible int do_syscall_trace_enter(struct pt_regs *regs)
{
if (test_thread_flag(TIF_SYSCALL_TRACE))
- if (tracehook_report_syscall_entry(regs))
+ if (ptrace_report_syscall_entry(regs))
return -1;
/*
@@ -176,10 +261,91 @@ __visible void do_syscall_trace_exit(struct pt_regs *regs)
audit_syscall_exit(regs);
if (test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, 0);
+ ptrace_report_syscall_exit(regs, 0);
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
trace_sys_exit(regs, regs_return_value(regs));
#endif
}
+
+#ifdef CONFIG_COMPAT
+static int compat_riscv_gpr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct compat_user_regs_struct cregs;
+
+ regs_to_cregs(&cregs, task_pt_regs(target));
+
+ return membuf_write(&to, &cregs,
+ sizeof(struct compat_user_regs_struct));
+}
+
+static int compat_riscv_gpr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ struct compat_user_regs_struct cregs;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &cregs, 0, -1);
+
+ cregs_to_regs(&cregs, task_pt_regs(target));
+
+ return ret;
+}
+
+static const struct user_regset compat_riscv_user_regset[] = {
+ [REGSET_X] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = ELF_NGREG,
+ .size = sizeof(compat_elf_greg_t),
+ .align = sizeof(compat_elf_greg_t),
+ .regset_get = compat_riscv_gpr_get,
+ .set = compat_riscv_gpr_set,
+ },
+#ifdef CONFIG_FPU
+ [REGSET_F] = {
+ .core_note_type = NT_PRFPREG,
+ .n = ELF_NFPREG,
+ .size = sizeof(elf_fpreg_t),
+ .align = sizeof(elf_fpreg_t),
+ .regset_get = riscv_fpr_get,
+ .set = riscv_fpr_set,
+ },
+#endif
+};
+
+static const struct user_regset_view compat_riscv_user_native_view = {
+ .name = "riscv",
+ .e_machine = EM_RISCV,
+ .regsets = compat_riscv_user_regset,
+ .n = ARRAY_SIZE(compat_riscv_user_regset),
+};
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+ compat_ulong_t caddr, compat_ulong_t cdata)
+{
+ long ret = -EIO;
+
+ switch (request) {
+ default:
+ ret = compat_ptrace_request(child, request, caddr, cdata);
+ break;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_COMPAT */
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(task, TIF_32BIT))
+ return &compat_riscv_user_native_view;
+ else
+#endif
+ return &riscv_user_native_view;
+}
diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
index ee5878d968cc..912288572226 100644
--- a/arch/riscv/kernel/reset.c
+++ b/arch/riscv/kernel/reset.c
@@ -12,7 +12,7 @@ static void default_power_off(void)
wait_for_interrupt();
}
-void (*pm_power_off)(void) = default_power_off;
+void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
void machine_restart(char *cmd)
@@ -23,10 +23,12 @@ void machine_restart(char *cmd)
void machine_halt(void)
{
- pm_power_off();
+ do_kernel_power_off();
+ default_power_off();
}
void machine_power_off(void)
{
- pm_power_off();
+ do_kernel_power_off();
+ default_power_off();
}
diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
index 450492e1cb4e..5ab1c7e1a6ed 100644
--- a/arch/riscv/kernel/riscv_ksyms.c
+++ b/arch/riscv/kernel/riscv_ksyms.c
@@ -11,5 +11,7 @@
*/
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(__memset);
EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(__memmove);
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index f383ef5672b2..775d3322b422 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -5,20 +5,22 @@
* Copyright (c) 2020 Western Digital Corporation or its affiliates.
*/
+#include <linux/bits.h>
#include <linux/init.h>
#include <linux/pm.h>
+#include <linux/reboot.h>
#include <asm/sbi.h>
#include <asm/smp.h>
/* default SBI version is 0.1 */
-unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT;
+unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
EXPORT_SYMBOL(sbi_spec_version);
-static void (*__sbi_set_timer)(uint64_t stime);
-static int (*__sbi_send_ipi)(const unsigned long *hart_mask);
-static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask,
+static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init;
+static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init;
+static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
- unsigned long arg4, unsigned long arg5);
+ unsigned long arg4, unsigned long arg5) __ro_after_init;
struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
unsigned long arg1, unsigned long arg2,
@@ -66,6 +68,30 @@ int sbi_err_map_linux_errno(int err)
EXPORT_SYMBOL(sbi_err_map_linux_errno);
#ifdef CONFIG_RISCV_SBI_V01
+static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask)
+{
+ unsigned long cpuid, hartid;
+ unsigned long hmask = 0;
+
+ /*
+ * There is no maximum hartid concept in RISC-V and NR_CPUS must not be
+ * associated with hartid. As SBI v0.1 is only kept for backward compatibility
+ * and will be removed in the future, there is no point in supporting hartid
+ * greater than BITS_PER_LONG (32 for RV32 and 64 for RV64). Ideally, SBI v0.2
+ * should be used for platforms with hartid greater than BITS_PER_LONG.
+ */
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hartid >= BITS_PER_LONG) {
+ pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n");
+ break;
+ }
+ hmask |= BIT(hartid);
+ }
+
+ return hmask;
+}
+
/**
* sbi_console_putchar() - Writes given character to the console device.
* @ch: The data to be written to the console.
@@ -116,7 +142,7 @@ void sbi_clear_ipi(void)
EXPORT_SYMBOL(sbi_clear_ipi);
/**
- * sbi_set_timer_v01() - Program the timer for next timer event.
+ * __sbi_set_timer_v01() - Program the timer for next timer event.
* @stime_value: The value after which next timer event should fire.
*
* Return: None
@@ -131,33 +157,44 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
#endif
}
-static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
- sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask,
+ unsigned long hart_mask;
+
+ if (!cpu_mask || cpumask_empty(cpu_mask))
+ cpu_mask = cpu_online_mask;
+ hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
+
+ sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask),
0, 0, 0, 0, 0);
return 0;
}
-static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
int result = 0;
+ unsigned long hart_mask;
+
+ if (!cpu_mask || cpumask_empty(cpu_mask))
+ cpu_mask = cpu_online_mask;
+ hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
/* v0.2 function IDs are equivalent to v0.1 extension IDs */
switch (fid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0,
- (unsigned long)hart_mask, 0, 0, 0, 0, 0);
+ (unsigned long)&hart_mask, 0, 0, 0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0,
- (unsigned long)hart_mask, start, size,
+ (unsigned long)&hart_mask, start, size,
0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0,
- (unsigned long)hart_mask, start, size,
+ (unsigned long)&hart_mask, start, size,
arg4, 0, 0);
break;
default:
@@ -179,7 +216,7 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
sbi_major_version(), sbi_minor_version());
}
-static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
pr_warn("IPI extension is not available in SBI v%lu.%lu\n",
sbi_major_version(), sbi_minor_version());
@@ -187,7 +224,7 @@ static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
return 0;
}
-static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
@@ -211,37 +248,44 @@ static void __sbi_set_timer_v02(uint64_t stime_value)
#endif
}
-static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask)
{
- unsigned long hartid, hmask_val, hbase;
- struct cpumask tmask;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
struct sbiret ret = {0};
int result;
- if (!hart_mask || !(*hart_mask)) {
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
- hart_mask = cpumask_bits(&tmask);
- }
-
- hmask_val = 0;
- hbase = 0;
- for_each_set_bit(hartid, hart_mask, NR_CPUS) {
- if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
- ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
- hmask_val, hbase, 0, 0, 0, 0);
- if (ret.error)
- goto ecall_failed;
- hmask_val = 0;
- hbase = 0;
+ if (!cpu_mask || cpumask_empty(cpu_mask))
+ cpu_mask = cpu_online_mask;
+
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hmask) {
+ if (hartid + BITS_PER_LONG <= htop ||
+ hbase + BITS_PER_LONG <= hartid) {
+ ret = sbi_ecall(SBI_EXT_IPI,
+ SBI_EXT_IPI_SEND_IPI, hmask,
+ hbase, 0, 0, 0, 0);
+ if (ret.error)
+ goto ecall_failed;
+ hmask = 0;
+ } else if (hartid < hbase) {
+ /* shift the mask to fit lower hartid */
+ hmask <<= hbase - hartid;
+ hbase = hartid;
+ }
}
- if (!hmask_val)
+ if (!hmask) {
hbase = hartid;
- hmask_val |= 1UL << (hartid - hbase);
+ htop = hartid;
+ } else if (hartid > htop) {
+ htop = hartid;
+ }
+ hmask |= BIT(hartid - hbase);
}
- if (hmask_val) {
+ if (hmask) {
ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
- hmask_val, hbase, 0, 0, 0, 0);
+ hmask, hbase, 0, 0, 0, 0);
if (ret.error)
goto ecall_failed;
}
@@ -251,11 +295,11 @@ static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
ecall_failed:
result = sbi_err_map_linux_errno(ret.error);
pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
- __func__, hbase, hmask_val, result);
+ __func__, hbase, hmask, result);
return result;
}
-static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
+static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask,
unsigned long hbase, unsigned long start,
unsigned long size, unsigned long arg4,
unsigned long arg5)
@@ -266,31 +310,31 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
switch (fid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0);
+ ret = sbi_ecall(ext, fid, hmask, hbase, 0, 0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
default:
@@ -302,43 +346,49 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
if (ret.error) {
result = sbi_err_map_linux_errno(ret.error);
pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
- __func__, hbase, hmask_val, result);
+ __func__, hbase, hmask, result);
}
return result;
}
-static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
- unsigned long hmask_val, hartid, hbase;
- struct cpumask tmask;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
int result;
- if (!hart_mask || !(*hart_mask)) {
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
- hart_mask = cpumask_bits(&tmask);
- }
-
- hmask_val = 0;
- hbase = 0;
- for_each_set_bit(hartid, hart_mask, NR_CPUS) {
- if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
- result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
- start, size, arg4, arg5);
- if (result)
- return result;
- hmask_val = 0;
- hbase = 0;
+ if (!cpu_mask || cpumask_empty(cpu_mask))
+ cpu_mask = cpu_online_mask;
+
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hmask) {
+ if (hartid + BITS_PER_LONG <= htop ||
+ hbase + BITS_PER_LONG <= hartid) {
+ result = __sbi_rfence_v02_call(fid, hmask,
+ hbase, start, size, arg4, arg5);
+ if (result)
+ return result;
+ hmask = 0;
+ } else if (hartid < hbase) {
+ /* shift the mask to fit lower hartid */
+ hmask <<= hbase - hartid;
+ hbase = hartid;
+ }
}
- if (!hmask_val)
+ if (!hmask) {
hbase = hartid;
- hmask_val |= 1UL << (hartid - hbase);
+ htop = hartid;
+ } else if (hartid > htop) {
+ htop = hartid;
+ }
+ hmask |= BIT(hartid - hbase);
}
- if (hmask_val) {
- result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
+ if (hmask) {
+ result = __sbi_rfence_v02_call(fid, hmask, hbase,
start, size, arg4, arg5);
if (result)
return result;
@@ -351,7 +401,7 @@ static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask,
* sbi_set_timer() - Program the timer for next timer event.
* @stime_value: The value after which next timer event should fire.
*
- * Return: None
+ * Return: None.
*/
void sbi_set_timer(uint64_t stime_value)
{
@@ -360,44 +410,44 @@ void sbi_set_timer(uint64_t stime_value)
/**
* sbi_send_ipi() - Send an IPI to any hart.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
*
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
*/
-void sbi_send_ipi(const unsigned long *hart_mask)
+int sbi_send_ipi(const struct cpumask *cpu_mask)
{
- __sbi_send_ipi(hart_mask);
+ return __sbi_send_ipi(cpu_mask);
}
EXPORT_SYMBOL(sbi_send_ipi);
/**
* sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
*
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
*/
-void sbi_remote_fence_i(const unsigned long *hart_mask)
+int sbi_remote_fence_i(const struct cpumask *cpu_mask)
{
- __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
- hart_mask, 0, 0, 0, 0);
+ return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
+ cpu_mask, 0, 0, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_fence_i);
/**
* sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
* harts for the specified virtual address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
*
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
*/
-void sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
- __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
- hart_mask, start, size, 0, 0);
+ return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma);
@@ -405,38 +455,38 @@ EXPORT_SYMBOL(sbi_remote_sfence_vma);
* sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
* remote harts for a virtual address range belonging to a specific ASID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
* @asid: The value of address space identifier (ASID).
*
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
*/
-void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
- __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
- hart_mask, start, size, asid, 0);
+ return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
+ cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
/**
* sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote
* harts for the specified guest physical address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the guest physical address
* @size: Total size of the guest physical address range.
*
* Return: None
*/
-int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
@@ -444,38 +494,38 @@ EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
* sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given
* remote harts for a guest physical address range belonging to a specific VMID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the guest physical address
* @size: Total size of the guest physical address range.
* @vmid: The value of guest ID (VMID).
*
* Return: 0 if success, Error otherwise.
*/
-int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long vmid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID,
- hart_mask, start, size, vmid, 0);
+ cpu_mask, start, size, vmid, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid);
/**
* sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote
* harts for the current guest virtual address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the current guest virtual address
* @size: Total size of the current guest virtual address range.
*
* Return: None
*/
-int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_vvma);
@@ -484,23 +534,49 @@ EXPORT_SYMBOL(sbi_remote_hfence_vvma);
* remote harts for current guest virtual address range belonging to a specific
* ASID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the current guest virtual address
* @size: Total size of the current guest virtual address range.
* @asid: The value of address space identifier (ASID).
*
* Return: None
*/
-int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID,
- hart_mask, start, size, asid, 0);
+ cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid);
+static void sbi_srst_reset(unsigned long type, unsigned long reason)
+{
+ sbi_ecall(SBI_EXT_SRST, SBI_EXT_SRST_RESET, type, reason,
+ 0, 0, 0, 0);
+ pr_warn("%s: type=0x%lx reason=0x%lx failed\n",
+ __func__, type, reason);
+}
+
+static int sbi_srst_reboot(struct notifier_block *this,
+ unsigned long mode, void *cmd)
+{
+ sbi_srst_reset((mode == REBOOT_WARM || mode == REBOOT_SOFT) ?
+ SBI_SRST_RESET_TYPE_WARM_REBOOT :
+ SBI_SRST_RESET_TYPE_COLD_REBOOT,
+ SBI_SRST_RESET_REASON_NONE);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block sbi_srst_reboot_nb;
+
+static void sbi_srst_power_off(void)
+{
+ sbi_srst_reset(SBI_SRST_RESET_TYPE_SHUTDOWN,
+ SBI_SRST_RESET_REASON_NONE);
+}
+
/**
* sbi_probe_extension() - Check if an SBI extension ID is supported or not.
* @extid: The extension ID to be probed.
@@ -547,8 +623,31 @@ static inline long sbi_get_firmware_version(void)
return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_VERSION);
}
+long sbi_get_mvendorid(void)
+{
+ return __sbi_base_ecall(SBI_EXT_BASE_GET_MVENDORID);
+}
+
+long sbi_get_marchid(void)
+{
+ return __sbi_base_ecall(SBI_EXT_BASE_GET_MARCHID);
+}
+
+long sbi_get_mimpid(void)
+{
+ return __sbi_base_ecall(SBI_EXT_BASE_GET_MIMPID);
+}
-int __init sbi_init(void)
+static void sbi_send_cpumask_ipi(const struct cpumask *target)
+{
+ sbi_send_ipi(target);
+}
+
+static const struct riscv_ipi_ops sbi_ipi_ops = {
+ .ipi_inject = sbi_send_cpumask_ipi
+};
+
+void __init sbi_init(void)
{
int ret;
@@ -565,27 +664,35 @@ int __init sbi_init(void)
sbi_get_firmware_id(), sbi_get_firmware_version());
if (sbi_probe_extension(SBI_EXT_TIME) > 0) {
__sbi_set_timer = __sbi_set_timer_v02;
- pr_info("SBI v0.2 TIME extension detected\n");
+ pr_info("SBI TIME extension detected\n");
} else {
__sbi_set_timer = __sbi_set_timer_v01;
}
if (sbi_probe_extension(SBI_EXT_IPI) > 0) {
__sbi_send_ipi = __sbi_send_ipi_v02;
- pr_info("SBI v0.2 IPI extension detected\n");
+ pr_info("SBI IPI extension detected\n");
} else {
__sbi_send_ipi = __sbi_send_ipi_v01;
}
if (sbi_probe_extension(SBI_EXT_RFENCE) > 0) {
__sbi_rfence = __sbi_rfence_v02;
- pr_info("SBI v0.2 RFENCE extension detected\n");
+ pr_info("SBI RFENCE extension detected\n");
} else {
__sbi_rfence = __sbi_rfence_v01;
}
+ if ((sbi_spec_version >= sbi_mk_version(0, 3)) &&
+ (sbi_probe_extension(SBI_EXT_SRST) > 0)) {
+ pr_info("SBI SRST extension detected\n");
+ pm_power_off = sbi_srst_power_off;
+ sbi_srst_reboot_nb.notifier_call = sbi_srst_reboot;
+ sbi_srst_reboot_nb.priority = 192;
+ register_restart_handler(&sbi_srst_reboot_nb);
+ }
} else {
__sbi_set_timer = __sbi_set_timer_v01;
__sbi_send_ipi = __sbi_send_ipi_v01;
__sbi_rfence = __sbi_rfence_v01;
}
- return 0;
+ riscv_set_ipi_ops(&sbi_ipi_ops);
}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index f04373be54a6..f0f36a4a0e9b 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -4,6 +4,8 @@
* Chen Liqin <liqin.chen@sunplusct.com>
* Lennox Wu <lennox.wu@sunplusct.com>
* Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2020 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
*/
#include <linux/init.h>
@@ -15,22 +17,27 @@
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
#include <linux/sched/task.h>
-#include <linux/swiotlb.h>
#include <linux/smp.h>
+#include <linux/efi.h>
+#include <linux/crash_dump.h>
-#include <asm/clint.h>
+#include <asm/alternative.h>
#include <asm/cpu_ops.h>
+#include <asm/early_ioremap.h>
+#include <asm/pgtable.h>
#include <asm/setup.h>
+#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/sbi.h>
#include <asm/tlbflush.h>
#include <asm/thread_info.h>
#include <asm/kasan.h>
+#include <asm/efi.h>
#include "head.h"
-#ifdef CONFIG_DUMMY_CONSOLE
-struct screen_info screen_info = {
+#if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI)
+struct screen_info screen_info __section(".data") = {
.orig_video_lines = 30,
.orig_video_cols = 80,
.orig_video_mode = 0,
@@ -45,72 +52,277 @@ struct screen_info screen_info = {
* This is used before the kernel initializes the BSS so it can't be in the
* BSS.
*/
-atomic_t hart_lottery __section(.sdata);
+atomic_t hart_lottery __section(".sdata")
+#ifdef CONFIG_XIP_KERNEL
+= ATOMIC_INIT(0xC001BEEF)
+#endif
+;
unsigned long boot_cpu_hartid;
static DEFINE_PER_CPU(struct cpu, cpu_devices);
-void __init parse_dtb(void)
+/*
+ * Place kernel memory regions on the resource tree so that
+ * kexec-tools can retrieve them from /proc/iomem. While there
+ * also add "System RAM" regions for compatibility with other
+ * archs, and the rest of the known regions for completeness.
+ */
+static struct resource kimage_res = { .name = "Kernel image", };
+static struct resource code_res = { .name = "Kernel code", };
+static struct resource data_res = { .name = "Kernel data", };
+static struct resource rodata_res = { .name = "Kernel rodata", };
+static struct resource bss_res = { .name = "Kernel bss", };
+#ifdef CONFIG_CRASH_DUMP
+static struct resource elfcorehdr_res = { .name = "ELF Core hdr", };
+#endif
+
+static int __init add_resource(struct resource *parent,
+ struct resource *res)
+{
+ int ret = 0;
+
+ ret = insert_resource(parent, res);
+ if (ret < 0) {
+ pr_err("Failed to add a %s resource at %llx\n",
+ res->name, (unsigned long long) res->start);
+ return ret;
+ }
+
+ return 1;
+}
+
+static int __init add_kernel_resources(void)
+{
+ int ret = 0;
+
+ /*
+ * The memory region of the kernel image is continuous and
+ * was reserved on setup_bootmem, register it here as a
+ * resource, with the various segments of the image as
+ * child nodes.
+ */
+
+ code_res.start = __pa_symbol(_text);
+ code_res.end = __pa_symbol(_etext) - 1;
+ code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+ rodata_res.start = __pa_symbol(__start_rodata);
+ rodata_res.end = __pa_symbol(__end_rodata) - 1;
+ rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+ data_res.start = __pa_symbol(_data);
+ data_res.end = __pa_symbol(_edata) - 1;
+ data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+ bss_res.start = __pa_symbol(__bss_start);
+ bss_res.end = __pa_symbol(__bss_stop) - 1;
+ bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+ kimage_res.start = code_res.start;
+ kimage_res.end = bss_res.end;
+ kimage_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+ ret = add_resource(&iomem_resource, &kimage_res);
+ if (ret < 0)
+ return ret;
+
+ ret = add_resource(&kimage_res, &code_res);
+ if (ret < 0)
+ return ret;
+
+ ret = add_resource(&kimage_res, &rodata_res);
+ if (ret < 0)
+ return ret;
+
+ ret = add_resource(&kimage_res, &data_res);
+ if (ret < 0)
+ return ret;
+
+ ret = add_resource(&kimage_res, &bss_res);
+
+ return ret;
+}
+
+static void __init init_resources(void)
+{
+ struct memblock_region *region = NULL;
+ struct resource *res = NULL;
+ struct resource *mem_res = NULL;
+ size_t mem_res_sz = 0;
+ int num_resources = 0, res_idx = 0;
+ int ret = 0;
+
+ /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */
+ num_resources = memblock.memory.cnt + memblock.reserved.cnt + 1;
+ res_idx = num_resources - 1;
+
+ mem_res_sz = num_resources * sizeof(*mem_res);
+ mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES);
+ if (!mem_res)
+ panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz);
+
+ /*
+ * Start by adding the reserved regions, if they overlap
+ * with /memory regions, insert_resource later on will take
+ * care of it.
+ */
+ ret = add_kernel_resources();
+ if (ret < 0)
+ goto error;
+
+#ifdef CONFIG_KEXEC_CORE
+ if (crashk_res.start != crashk_res.end) {
+ ret = add_resource(&iomem_resource, &crashk_res);
+ if (ret < 0)
+ goto error;
+ }
+#endif
+
+#ifdef CONFIG_CRASH_DUMP
+ if (elfcorehdr_size > 0) {
+ elfcorehdr_res.start = elfcorehdr_addr;
+ elfcorehdr_res.end = elfcorehdr_addr + elfcorehdr_size - 1;
+ elfcorehdr_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ add_resource(&iomem_resource, &elfcorehdr_res);
+ }
+#endif
+
+ for_each_reserved_mem_region(region) {
+ res = &mem_res[res_idx--];
+
+ res->name = "Reserved";
+ res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE;
+ res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region));
+ res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1;
+
+ /*
+ * Ignore any other reserved regions within
+ * system memory.
+ */
+ if (memblock_is_memory(res->start)) {
+ /* Re-use this pre-allocated resource */
+ res_idx++;
+ continue;
+ }
+
+ ret = add_resource(&iomem_resource, res);
+ if (ret < 0)
+ goto error;
+ }
+
+ /* Add /memory regions to the resource tree */
+ for_each_mem_region(region) {
+ res = &mem_res[res_idx--];
+
+ if (unlikely(memblock_is_nomap(region))) {
+ res->name = "Reserved";
+ res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE;
+ } else {
+ res->name = "System RAM";
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ }
+
+ res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
+ res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+
+ ret = add_resource(&iomem_resource, res);
+ if (ret < 0)
+ goto error;
+ }
+
+ /* Clean-up any unused pre-allocated resources */
+ if (res_idx >= 0)
+ memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res));
+ return;
+
+ error:
+ /* Better an empty resource tree than an inconsistent one */
+ release_child_resources(&iomem_resource);
+ memblock_free(mem_res, mem_res_sz);
+}
+
+
+static void __init parse_dtb(void)
{
- if (early_init_dt_scan(dtb_early_va))
+ /* Early scan of device tree from init memory */
+ if (early_init_dt_scan(dtb_early_va)) {
+ const char *name = of_flat_dt_get_machine_name();
+
+ if (name) {
+ pr_info("Machine model: %s\n", name);
+ dump_stack_set_arch_desc("%s (DT)", name);
+ }
return;
+ }
pr_err("No DTB passed to the kernel\n");
#ifdef CONFIG_CMDLINE_FORCE
- strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
pr_info("Forcing kernel command line to: %s\n", boot_command_line);
#endif
}
void __init setup_arch(char **cmdline_p)
{
- init_mm.start_code = (unsigned long) _stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) _end;
+ parse_dtb();
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
*cmdline_p = boot_command_line;
+ early_ioremap_setup();
+ jump_label_init();
parse_early_param();
- setup_bootmem();
+ efi_init();
paging_init();
#if IS_ENABLED(CONFIG_BUILTIN_DTB)
unflatten_and_copy_device_tree();
#else
- unflatten_device_tree();
+ if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa))))
+ unflatten_device_tree();
+ else
+ pr_err("No DTB found in kernel mappings\n");
#endif
- clint_init_boot_cpu();
+ misc_mem_init();
-#ifdef CONFIG_SWIOTLB
- swiotlb_init(1);
-#endif
+ init_resources();
+ sbi_init();
#ifdef CONFIG_KASAN
kasan_init();
#endif
-#if IS_ENABLED(CONFIG_RISCV_SBI)
- sbi_init();
-#endif
-
#ifdef CONFIG_SMP
setup_smp();
#endif
riscv_fill_hwcap();
+ apply_boot_alternatives();
}
static int __init topology_init(void)
{
- int i;
+ int i, ret;
for_each_possible_cpu(i) {
struct cpu *cpu = &per_cpu(cpu_devices, i);
cpu->hotpluggable = cpu_has_hotplug(i);
- register_cpu(cpu, i);
+ ret = register_cpu(cpu, i);
+ if (unlikely(ret))
+ pr_warn("Warning: %s: register_cpu %d failed (%d)\n",
+ __func__, i, ret);
}
return 0;
}
subsys_initcall(topology_init);
+
+void free_initmem(void)
+{
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
+ IS_ENABLED(CONFIG_64BIT) ?
+ set_memory_rw : set_memory_rw_nx);
+
+ free_initmem_default(POISON_FREE_INITMEM);
+}
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 17ba190e84a5..38b05ca6fe66 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -6,14 +6,16 @@
* Copyright (C) 2012 Regents of the University of California
*/
+#include <linux/compat.h>
#include <linux/signal.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
-#include <linux/tracehook.h>
+#include <linux/resume_user_mode.h>
#include <linux/linkage.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
+#include <asm/signal32.h>
#include <asm/switch_to.h>
#include <asm/csr.h>
@@ -90,7 +92,7 @@ static long restore_sigcontext(struct pt_regs *regs,
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs));
/* Restore the floating-point state. */
- if (has_fpu)
+ if (has_fpu())
err |= restore_fp_state(regs, &sc->sc_fpregs);
return err;
}
@@ -143,7 +145,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs));
/* Save the floating-point state. */
- if (has_fpu)
+ if (has_fpu())
err |= save_fp_state(regs, &sc->sc_fpregs);
return err;
}
@@ -250,7 +252,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
regs->a0 = -EINTR;
break;
}
- /* fallthrough */
+ fallthrough;
case -ERESTARTNOINTR:
regs->a0 = regs->orig_a0;
regs->epc -= 0x4;
@@ -258,8 +260,13 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
}
}
+ rseq_signal_deliver(ksig, regs);
+
/* Set up the stack frame */
- ret = setup_rt_frame(ksig, oldset, regs);
+ if (is_compat_task())
+ ret = compat_setup_rt_frame(ksig, oldset, regs);
+ else
+ ret = setup_rt_frame(ksig, oldset, regs);
signal_setup_done(ret, ksig, 0);
}
@@ -309,12 +316,13 @@ static void do_signal(struct pt_regs *regs)
asmlinkage __visible void do_notify_resume(struct pt_regs *regs,
unsigned long thread_info_flags)
{
+ if (thread_info_flags & _TIF_UPROBE)
+ uprobe_notify_resume(regs);
+
/* Handle pending signal delivery */
- if (thread_info_flags & _TIF_SIGPENDING)
+ if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
do_signal(regs);
- if (thread_info_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
- }
+ if (thread_info_flags & _TIF_NOTIFY_RESUME)
+ resume_user_mode_work(regs);
}
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index b1d4f452f843..b5d30ea92292 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -9,6 +9,7 @@
*/
#include <linux/cpu.h>
+#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/profile.h>
@@ -16,8 +17,8 @@
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/delay.h>
+#include <linux/irq_work.h>
-#include <asm/clint.h>
#include <asm/sbi.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
@@ -26,10 +27,12 @@ enum ipi_message_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
+ IPI_IRQ_WORK,
+ IPI_TIMER,
IPI_MAX
};
-unsigned long __cpuid_to_hartid_map[NR_CPUS] = {
+unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = {
[0 ... NR_CPUS-1] = INVALID_HARTID
};
@@ -53,19 +56,9 @@ int riscv_hartid_to_cpuid(int hartid)
return i;
pr_err("Couldn't find cpu id for hartid [%d]\n", hartid);
- return i;
+ return -ENOENT;
}
-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
-{
- int cpu;
-
- cpumask_clear(out);
- for_each_cpu(cpu, in)
- cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
-}
-EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
-
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
return phys_id == cpuid_to_hartid_map(cpu);
@@ -84,9 +77,25 @@ static void ipi_stop(void)
wait_for_interrupt();
}
+static const struct riscv_ipi_ops *ipi_ops __ro_after_init;
+
+void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops)
+{
+ ipi_ops = ops;
+}
+EXPORT_SYMBOL_GPL(riscv_set_ipi_ops);
+
+void riscv_clear_ipi(void)
+{
+ if (ipi_ops && ipi_ops->ipi_clear)
+ ipi_ops->ipi_clear();
+
+ csr_clear(CSR_IP, IE_SIE);
+}
+EXPORT_SYMBOL_GPL(riscv_clear_ipi);
+
static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op)
{
- struct cpumask hartid_mask;
int cpu;
smp_mb__before_atomic();
@@ -94,44 +103,37 @@ static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op)
set_bit(op, &ipi_data[cpu].bits);
smp_mb__after_atomic();
- riscv_cpuid_to_hartid_mask(mask, &hartid_mask);
- if (IS_ENABLED(CONFIG_RISCV_SBI))
- sbi_send_ipi(cpumask_bits(&hartid_mask));
+ if (ipi_ops && ipi_ops->ipi_inject)
+ ipi_ops->ipi_inject(mask);
else
- clint_send_ipi_mask(mask);
+ pr_warn("SMP: IPI inject method not available\n");
}
static void send_ipi_single(int cpu, enum ipi_message_type op)
{
- int hartid = cpuid_to_hartid_map(cpu);
-
smp_mb__before_atomic();
set_bit(op, &ipi_data[cpu].bits);
smp_mb__after_atomic();
- if (IS_ENABLED(CONFIG_RISCV_SBI))
- sbi_send_ipi(cpumask_bits(cpumask_of(hartid)));
+ if (ipi_ops && ipi_ops->ipi_inject)
+ ipi_ops->ipi_inject(cpumask_of(cpu));
else
- clint_send_ipi_single(hartid);
+ pr_warn("SMP: IPI inject method not available\n");
}
-static inline void clear_ipi(void)
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
{
- if (IS_ENABLED(CONFIG_RISCV_SBI))
- csr_clear(CSR_IP, IE_SIE);
- else
- clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
+ send_ipi_single(smp_processor_id(), IPI_IRQ_WORK);
}
+#endif
void handle_IPI(struct pt_regs *regs)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
unsigned long *stats = ipi_data[smp_processor_id()].stats;
- irq_enter();
-
- clear_ipi();
+ riscv_clear_ipi();
while (true) {
unsigned long ops;
@@ -141,7 +143,7 @@ void handle_IPI(struct pt_regs *regs)
ops = xchg(pending_ipis, 0);
if (ops == 0)
- goto done;
+ return;
if (ops & (1 << IPI_RESCHEDULE)) {
stats[IPI_RESCHEDULE]++;
@@ -158,21 +160,30 @@ void handle_IPI(struct pt_regs *regs)
ipi_stop();
}
+ if (ops & (1 << IPI_IRQ_WORK)) {
+ stats[IPI_IRQ_WORK]++;
+ irq_work_run();
+ }
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+ if (ops & (1 << IPI_TIMER)) {
+ stats[IPI_TIMER]++;
+ tick_receive_broadcast();
+ }
+#endif
BUG_ON((ops >> IPI_MAX) != 0);
/* Order data access and bit testing. */
mb();
}
-
-done:
- irq_exit();
- set_irq_regs(old_regs);
}
static const char * const ipi_names[] = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
+ [IPI_IRQ_WORK] = "IRQ work interrupts",
+ [IPI_TIMER] = "Timer broadcast interrupts",
};
void show_ipi_stats(struct seq_file *p, int prec)
@@ -198,6 +209,13 @@ void arch_send_call_function_single_ipi(int cpu)
send_ipi_single(cpu, IPI_CALL_FUNC);
}
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void tick_broadcast(const struct cpumask *mask)
+{
+ send_ipi_mask(mask, IPI_TIMER);
+}
+#endif
+
void smp_send_stop(void)
{
unsigned long timeout;
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 4e9922790f6e..f1e4948a4b52 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -24,10 +24,10 @@
#include <linux/of.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/mm.h>
-#include <asm/clint.h>
#include <asm/cpu_ops.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
+#include <asm/numa.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/sbi.h>
@@ -46,13 +46,18 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
{
int cpuid;
int ret;
+ unsigned int curr_cpuid;
+
+ curr_cpuid = smp_processor_id();
+ numa_store_cpu_info(curr_cpuid);
+ numa_add_cpu(curr_cpuid);
/* This covers non-smp usecase mandated by "nosmp" option */
if (max_cpus == 0)
return;
for_each_possible_cpu(cpuid) {
- if (cpuid == smp_processor_id())
+ if (cpuid == curr_cpuid)
continue;
if (cpu_ops[cpuid]->cpu_prepare) {
ret = cpu_ops[cpuid]->cpu_prepare(cpuid);
@@ -60,6 +65,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
continue;
}
set_cpu_present(cpuid, true);
+ numa_store_cpu_info(cpuid);
}
}
@@ -80,15 +86,17 @@ void __init setup_smp(void)
if (hart == cpuid_to_hartid_map(0)) {
BUG_ON(found_boot_cpu);
found_boot_cpu = 1;
+ early_map_cpu_to_node(0, of_node_to_nid(dn));
continue;
}
if (cpuid >= NR_CPUS) {
pr_warn("Invalid cpuid [%d] for hartid [%d]\n",
cpuid, hart);
- break;
+ continue;
}
cpuid_to_hartid_map(cpuid) = hart;
+ early_map_cpu_to_node(cpuid, of_node_to_nid(dn));
cpuid++;
}
@@ -106,7 +114,7 @@ void __init setup_smp(void)
}
}
-int start_secondary_cpu(int cpu, struct task_struct *tidle)
+static int start_secondary_cpu(int cpu, struct task_struct *tidle)
{
if (cpu_ops[cpu]->cpu_start)
return cpu_ops[cpu]->cpu_start(cpu, tidle);
@@ -121,7 +129,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
ret = start_secondary_cpu(cpu, tidle);
if (!ret) {
- lockdep_assert_held(&cpu_running);
wait_for_completion_timeout(&cpu_running,
msecs_to_jiffies(1000));
@@ -146,18 +153,19 @@ void __init smp_cpus_done(unsigned int max_cpus)
asmlinkage __visible void smp_callin(void)
{
struct mm_struct *mm = &init_mm;
+ unsigned int curr_cpuid = smp_processor_id();
- if (!IS_ENABLED(CONFIG_RISCV_SBI))
- clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
+ riscv_clear_ipi();
/* All kernel threads share the same mm context. */
mmgrab(mm);
current->active_mm = mm;
- trap_init();
- notify_cpu_starting(smp_processor_id());
- update_siblings_masks(smp_processor_id());
- set_cpu_online(smp_processor_id(), 1);
+ notify_cpu_starting(curr_cpuid);
+ numa_add_cpu(curr_cpuid);
+ update_siblings_masks(curr_cpuid);
+ set_cpu_online(curr_cpuid, 1);
+
/*
* Remote TLB flushes are ignored while the CPU is offline, so emit
* a local TLB flush right now just in case.
@@ -168,7 +176,6 @@ asmlinkage __visible void smp_callin(void)
* Disable preemption before enabling interrupts, so we don't try to
* schedule a CPU that hasn't actually started yet.
*/
- preempt_disable();
local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
diff --git a/arch/riscv/kernel/soc.c b/arch/riscv/kernel/soc.c
index c7b0a73e382e..a0516172a33c 100644
--- a/arch/riscv/kernel/soc.c
+++ b/arch/riscv/kernel/soc.c
@@ -26,30 +26,3 @@ void __init soc_early_init(void)
}
}
}
-
-static bool soc_builtin_dtb_match(unsigned long vendor_id,
- unsigned long arch_id, unsigned long imp_id,
- const struct soc_builtin_dtb *entry)
-{
- return entry->vendor_id == vendor_id &&
- entry->arch_id == arch_id &&
- entry->imp_id == imp_id;
-}
-
-void * __init soc_lookup_builtin_dtb(void)
-{
- unsigned long vendor_id, arch_id, imp_id;
- const struct soc_builtin_dtb *s;
-
- __asm__ ("csrr %0, mvendorid" : "=r"(vendor_id));
- __asm__ ("csrr %0, marchid" : "=r"(arch_id));
- __asm__ ("csrr %0, mimpid" : "=r"(imp_id));
-
- for (s = (void *)&__soc_builtin_dtb_table_start;
- (void *)s < (void *)&__soc_builtin_dtb_table_end; s++) {
- if (soc_builtin_dtb_match(vendor_id, arch_id, imp_id, s))
- return s->dtb_func();
- }
-
- return NULL;
-}
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 595342910c3f..08d11a53f39e 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -12,28 +12,23 @@
#include <linux/stacktrace.h>
#include <linux/ftrace.h>
-register unsigned long sp_in_global __asm__("sp");
+#include <asm/stacktrace.h>
#ifdef CONFIG_FRAME_POINTER
-struct stackframe {
- unsigned long fp;
- unsigned long ra;
-};
-
void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
- bool (*fn)(unsigned long, void *), void *arg)
+ bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long fp, sp, pc;
+ int level = 0;
if (regs) {
fp = frame_pointer(regs);
sp = user_stack_pointer(regs);
pc = instruction_pointer(regs);
} else if (task == NULL || task == current) {
- const register unsigned long current_sp = sp_in_global;
fp = (unsigned long)__builtin_frame_address(0);
- sp = current_sp;
+ sp = current_stack_pointer;
pc = (unsigned long)walk_stackframe;
} else {
/* task blocked in __switch_to */
@@ -46,7 +41,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
unsigned long low, high;
struct stackframe *frame;
- if (unlikely(!__kernel_text_address(pc) || fn(pc, arg)))
+ if (unlikely(!__kernel_text_address(pc) || (level++ >= 1 && !fn(arg, pc))))
break;
/* Validate frame pointer */
@@ -57,16 +52,22 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
/* Unwind stack frame */
frame = (struct stackframe *)fp - 1;
sp = fp;
- fp = frame->fp;
- pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
- (unsigned long *)(fp - 8));
+ if (regs && (regs->epc == pc) && (frame->fp & 0x7)) {
+ fp = frame->ra;
+ pc = regs->ra;
+ } else {
+ fp = frame->fp;
+ pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+ (unsigned long *)(fp - 8));
+ }
+
}
}
#else /* !CONFIG_FRAME_POINTER */
void notrace walk_stackframe(struct task_struct *task,
- struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg)
+ struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long sp, pc;
unsigned long *ksp;
@@ -75,7 +76,7 @@ void notrace walk_stackframe(struct task_struct *task,
sp = user_stack_pointer(regs);
pc = instruction_pointer(regs);
} else if (task == NULL || task == current) {
- sp = sp_in_global;
+ sp = current_stack_pointer;
pc = (unsigned long)walk_stackframe;
} else {
/* task blocked in __switch_to */
@@ -88,7 +89,7 @@ void notrace walk_stackframe(struct task_struct *task,
ksp = (unsigned long *)sp;
while (!kstack_end(ksp)) {
- if (__kernel_text_address(pc) && unlikely(fn(pc, arg)))
+ if (__kernel_text_address(pc) && unlikely(!fn(arg, pc)))
break;
pc = (*ksp++) - 0x4;
}
@@ -96,76 +97,49 @@ void notrace walk_stackframe(struct task_struct *task,
#endif /* CONFIG_FRAME_POINTER */
-
-static bool print_trace_address(unsigned long pc, void *arg)
+static bool print_trace_address(void *arg, unsigned long pc)
{
const char *loglvl = arg;
print_ip_sym(loglvl, pc);
- return false;
+ return true;
+}
+
+noinline void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
+ const char *loglvl)
+{
+ walk_stackframe(task, regs, print_trace_address, (void *)loglvl);
}
void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
{
- pr_cont("Call Trace:\n");
- walk_stackframe(task, NULL, print_trace_address, (void *)loglvl);
+ pr_cont("%sCall Trace:\n", loglvl);
+ dump_backtrace(NULL, task, loglvl);
}
-static bool save_wchan(unsigned long pc, void *arg)
+static bool save_wchan(void *arg, unsigned long pc)
{
if (!in_sched_functions(pc)) {
unsigned long *p = arg;
*p = pc;
- return true;
+ return false;
}
- return false;
+ return true;
}
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
{
unsigned long pc = 0;
- if (likely(task && task != current && task->state != TASK_RUNNING))
- walk_stackframe(task, NULL, save_wchan, &pc);
+ if (!try_get_task_stack(task))
+ return 0;
+ walk_stackframe(task, NULL, save_wchan, &pc);
+ put_task_stack(task);
return pc;
}
-
-#ifdef CONFIG_STACKTRACE
-
-static bool __save_trace(unsigned long pc, void *arg, bool nosched)
-{
- struct stack_trace *trace = arg;
-
- if (unlikely(nosched && in_sched_functions(pc)))
- return false;
- if (unlikely(trace->skip > 0)) {
- trace->skip--;
- return false;
- }
-
- trace->entries[trace->nr_entries++] = pc;
- return (trace->nr_entries >= trace->max_entries);
-}
-
-static bool save_trace(unsigned long pc, void *arg)
+noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+ struct task_struct *task, struct pt_regs *regs)
{
- return __save_trace(pc, arg, false);
+ walk_stackframe(task, regs, consume_entry, cookie);
}
-
-/*
- * Save stack-backtrace addresses into a stack_trace buffer.
- */
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
- walk_stackframe(tsk, NULL, save_trace, trace);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-
-void save_stack_trace(struct stack_trace *trace)
-{
- save_stack_trace_tsk(NULL, trace);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-
-#endif /* CONFIG_STACKTRACE */
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
new file mode 100644
index 000000000000..9ba24fb8cc93
--- /dev/null
+++ b/arch/riscv/kernel/suspend.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (c) 2022 Ventana Micro Systems Inc.
+ */
+
+#include <linux/ftrace.h>
+#include <asm/csr.h>
+#include <asm/suspend.h>
+
+static void suspend_save_csrs(struct suspend_context *context)
+{
+ context->scratch = csr_read(CSR_SCRATCH);
+ context->tvec = csr_read(CSR_TVEC);
+ context->ie = csr_read(CSR_IE);
+
+ /*
+ * No need to save/restore IP CSR (i.e. MIP or SIP) because:
+ *
+ * 1. For no-MMU (M-mode) kernel, the bits in MIP are set by
+ * external devices (such as interrupt controller, timer, etc).
+ * 2. For MMU (S-mode) kernel, the bits in SIP are set by
+ * M-mode firmware and external devices (such as interrupt
+ * controller, etc).
+ */
+
+#ifdef CONFIG_MMU
+ context->satp = csr_read(CSR_SATP);
+#endif
+}
+
+static void suspend_restore_csrs(struct suspend_context *context)
+{
+ csr_write(CSR_SCRATCH, context->scratch);
+ csr_write(CSR_TVEC, context->tvec);
+ csr_write(CSR_IE, context->ie);
+
+#ifdef CONFIG_MMU
+ csr_write(CSR_SATP, context->satp);
+#endif
+}
+
+int cpu_suspend(unsigned long arg,
+ int (*finish)(unsigned long arg,
+ unsigned long entry,
+ unsigned long context))
+{
+ int rc = 0;
+ struct suspend_context context = { 0 };
+
+ /* Finisher should be non-NULL */
+ if (!finish)
+ return -EINVAL;
+
+ /* Save additional CSRs*/
+ suspend_save_csrs(&context);
+
+ /*
+ * Function graph tracer state gets incosistent when the kernel
+ * calls functions that never return (aka finishers) hence disable
+ * graph tracing during their execution.
+ */
+ pause_graph_tracing();
+
+ /* Save context on stack */
+ if (__cpu_suspend_enter(&context)) {
+ /* Call the finisher */
+ rc = finish(arg, __pa_symbol(__cpu_resume_enter),
+ (ulong)&context);
+
+ /*
+ * Should never reach here, unless the suspend finisher
+ * fails. Successful cpu_suspend() should return from
+ * __cpu_resume_entry()
+ */
+ if (!rc)
+ rc = -EOPNOTSUPP;
+ }
+
+ /* Enable function graph tracer */
+ unpause_graph_tracing();
+
+ /* Restore additional CSRs */
+ suspend_restore_csrs(&context);
+
+ return rc;
+}
diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S
new file mode 100644
index 000000000000..aafcca58c19d
--- /dev/null
+++ b/arch/riscv/kernel/suspend_entry.S
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (c) 2022 Ventana Micro Systems Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/csr.h>
+#include <asm/xip_fixup.h>
+
+ .text
+ .altmacro
+ .option norelax
+
+ENTRY(__cpu_suspend_enter)
+ /* Save registers (except A0 and T0-T6) */
+ REG_S ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0)
+ REG_S sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0)
+ REG_S gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0)
+ REG_S tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0)
+ REG_S s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0)
+ REG_S s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0)
+ REG_S a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0)
+ REG_S a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0)
+ REG_S a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0)
+ REG_S a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0)
+ REG_S a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0)
+ REG_S a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0)
+ REG_S a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0)
+ REG_S s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0)
+ REG_S s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0)
+ REG_S s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0)
+ REG_S s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0)
+ REG_S s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0)
+ REG_S s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0)
+ REG_S s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0)
+ REG_S s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0)
+ REG_S s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0)
+ REG_S s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0)
+
+ /* Save CSRs */
+ csrr t0, CSR_EPC
+ REG_S t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0)
+ csrr t0, CSR_STATUS
+ REG_S t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0)
+ csrr t0, CSR_TVAL
+ REG_S t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0)
+ csrr t0, CSR_CAUSE
+ REG_S t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0)
+
+ /* Return non-zero value */
+ li a0, 1
+
+ /* Return to C code */
+ ret
+END(__cpu_suspend_enter)
+
+ENTRY(__cpu_resume_enter)
+ /* Load the global pointer */
+ .option push
+ .option norelax
+ la gp, __global_pointer$
+ .option pop
+
+#ifdef CONFIG_MMU
+ /* Save A0 and A1 */
+ add t0, a0, zero
+ add t1, a1, zero
+
+ /* Enable MMU */
+ la a0, swapper_pg_dir
+ XIP_FIXUP_OFFSET a0
+ call relocate_enable_mmu
+
+ /* Restore A0 and A1 */
+ add a0, t0, zero
+ add a1, t1, zero
+#endif
+
+ /* Make A0 point to suspend context */
+ add a0, a1, zero
+
+ /* Restore CSRs */
+ REG_L t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0)
+ csrw CSR_EPC, t0
+ REG_L t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0)
+ csrw CSR_STATUS, t0
+ REG_L t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0)
+ csrw CSR_TVAL, t0
+ REG_L t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0)
+ csrw CSR_CAUSE, t0
+
+ /* Restore registers (except A0 and T0-T6) */
+ REG_L ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0)
+ REG_L sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0)
+ REG_L gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0)
+ REG_L tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0)
+ REG_L s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0)
+ REG_L s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0)
+ REG_L a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0)
+ REG_L a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0)
+ REG_L a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0)
+ REG_L a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0)
+ REG_L a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0)
+ REG_L a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0)
+ REG_L a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0)
+ REG_L s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0)
+ REG_L s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0)
+ REG_L s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0)
+ REG_L s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0)
+ REG_L s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0)
+ REG_L s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0)
+ REG_L s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0)
+ REG_L s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0)
+ REG_L s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0)
+ REG_L s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0)
+
+ /* Return zero value */
+ add a0, zero, zero
+
+ /* Return to C code */
+ ret
+END(__cpu_resume_enter)
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index f3619f59d85c..9c0194f176fc 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -8,6 +8,7 @@
#include <linux/syscalls.h>
#include <asm/unistd.h>
#include <asm/cacheflush.h>
+#include <asm-generic/mman-common.h>
static long riscv_sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
@@ -16,6 +17,11 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
{
if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
return -EINVAL;
+
+ if ((prot & PROT_WRITE) && (prot & PROT_EXEC))
+ if (unlikely(!(prot & PROT_READ)))
+ return -EINVAL;
+
return ksys_mmap_pgoff(addr, len, prot, flags, fd,
offset >> (PAGE_SHIFT - page_shift_offset));
}
@@ -27,7 +33,9 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
{
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0);
}
-#else
+#endif
+
+#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, off_t, offset)
@@ -38,7 +46,7 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
*/
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12);
}
-#endif /* !CONFIG_64BIT */
+#endif
/*
* Allows the instruction cache to be flushed from userspace. Despite RISC-V
diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c
index f1ead9df96ca..44b1420a2270 100644
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -7,13 +7,12 @@
#include <linux/linkage.h>
#include <linux/syscalls.h>
#include <asm-generic/syscalls.h>
-#include <asm/vdso.h>
#include <asm/syscall.h>
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call),
-void *sys_call_table[__NR_syscalls] = {
+void * const sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = sys_ni_syscall,
#include <asm/unistd.h>
};
diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
index 4d3a1048ad8b..8217b0f67c6c 100644
--- a/arch/riscv/kernel/time.c
+++ b/arch/riscv/kernel/time.c
@@ -4,12 +4,14 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/of_clk.h>
#include <linux/clocksource.h>
#include <linux/delay.h>
#include <asm/sbi.h>
#include <asm/processor.h>
+#include <asm/timex.h>
-unsigned long riscv_timebase;
+unsigned long riscv_timebase __ro_after_init;
EXPORT_SYMBOL_GPL(riscv_timebase);
void __init time_init(void)
@@ -24,6 +26,8 @@ void __init time_init(void)
riscv_timebase = prop;
lpj_fine = riscv_timebase / HZ;
+
+ of_clk_init(NULL);
timer_probe();
}
diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c
new file mode 100644
index 000000000000..095ac976d7da
--- /dev/null
+++ b/arch/riscv/kernel/trace_irq.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
+ */
+
+#include <linux/irqflags.h>
+#include <linux/kprobes.h>
+#include "trace_irq.h"
+
+/*
+ * trace_hardirqs_on/off require the caller to setup frame pointer properly.
+ * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel.
+ * Here we add one extra level so they can be safely called by low
+ * level entry code which $fp is used for other purpose.
+ */
+
+void __trace_hardirqs_on(void)
+{
+ trace_hardirqs_on();
+}
+NOKPROBE_SYMBOL(__trace_hardirqs_on);
+
+void __trace_hardirqs_off(void)
+{
+ trace_hardirqs_off();
+}
+NOKPROBE_SYMBOL(__trace_hardirqs_off);
diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h
new file mode 100644
index 000000000000..99fe67377e5e
--- /dev/null
+++ b/arch/riscv/kernel/trace_irq.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
+ */
+#ifndef __TRACE_IRQ_H
+#define __TRACE_IRQ_H
+
+void __trace_hardirqs_on(void);
+void __trace_hardirqs_off(void);
+
+#endif /* __TRACE_IRQ_H */
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index ecec1778e3a4..b40426509244 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -12,18 +12,19 @@
#include <linux/signal.h>
#include <linux/kdebug.h>
#include <linux/uaccess.h>
+#include <linux/kprobes.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/irq.h>
+#include <asm/asm-prototypes.h>
+#include <asm/bug.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/csr.h>
int show_unhandled_signals = 1;
-extern asmlinkage void handle_exception(void);
-
static DEFINE_SPINLOCK(die_lock);
void die(struct pt_regs *regs, const char *str)
@@ -53,7 +54,7 @@ void die(struct pt_regs *regs, const char *str)
if (panic_on_oops)
panic("Fatal exception");
if (ret != NOTIFY_STOP)
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
@@ -66,7 +67,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
tsk->comm, task_pid_nr(tsk), signo, code, addr);
print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
pr_cont("\n");
- show_regs(regs);
+ __show_regs(regs);
}
force_sig_fault(signo, code, (void __user *)addr);
@@ -75,6 +76,8 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
static void do_trap_error(struct pt_regs *regs, int signo, int code,
unsigned long addr, const char *str)
{
+ current->thread.bad_cause = regs->cause;
+
if (user_mode(regs)) {
do_trap(regs, signo, code, addr);
} else {
@@ -83,8 +86,13 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code,
}
}
+#if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_RISCV_ALTERNATIVE)
+#define __trap_section __section(".xip.traps")
+#else
+#define __trap_section
+#endif
#define DO_ERROR_INFO(name, signo, code, str) \
-asmlinkage __visible void name(struct pt_regs *regs) \
+asmlinkage __visible __trap_section void name(struct pt_regs *regs) \
{ \
do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \
}
@@ -108,7 +116,7 @@ DO_ERROR_INFO(do_trap_store_misaligned,
int handle_misaligned_load(struct pt_regs *regs);
int handle_misaligned_store(struct pt_regs *regs);
-asmlinkage void do_trap_load_misaligned(struct pt_regs *regs)
+asmlinkage void __trap_section do_trap_load_misaligned(struct pt_regs *regs)
{
if (!handle_misaligned_load(regs))
return;
@@ -116,7 +124,7 @@ asmlinkage void do_trap_load_misaligned(struct pt_regs *regs)
"Oops - load address misaligned");
}
-asmlinkage void do_trap_store_misaligned(struct pt_regs *regs)
+asmlinkage void __trap_section do_trap_store_misaligned(struct pt_regs *regs)
{
if (!handle_misaligned_store(regs))
return;
@@ -137,14 +145,30 @@ static inline unsigned long get_break_insn_length(unsigned long pc)
{
bug_insn_t insn;
- if (probe_kernel_address((bug_insn_t *)pc, insn))
+ if (get_kernel_nofault(insn, (bug_insn_t *)pc))
return 0;
return GET_INSN_LENGTH(insn);
}
-asmlinkage __visible void do_trap_break(struct pt_regs *regs)
+asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
{
+#ifdef CONFIG_KPROBES
+ if (kprobe_single_step_handler(regs))
+ return;
+
+ if (kprobe_breakpoint_handler(regs))
+ return;
+#endif
+#ifdef CONFIG_UPROBES
+ if (uprobe_single_step_handler(regs))
+ return;
+
+ if (uprobe_breakpoint_handler(regs))
+ return;
+#endif
+ current->thread.bad_cause = regs->cause;
+
if (user_mode(regs))
force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->epc);
#ifdef CONFIG_KGDB
@@ -157,6 +181,7 @@ asmlinkage __visible void do_trap_break(struct pt_regs *regs)
else
die(regs, "Kernel BUG");
}
+NOKPROBE_SYMBOL(do_trap_break);
#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long pc)
@@ -165,7 +190,7 @@ int is_valid_bugaddr(unsigned long pc)
if (pc < VMALLOC_START)
return 0;
- if (probe_kernel_address((bug_insn_t *)pc, insn))
+ if (get_kernel_nofault(insn, (bug_insn_t *)pc))
return 0;
if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32)
return (insn == __BUG_INSN_32);
@@ -174,13 +199,37 @@ int is_valid_bugaddr(unsigned long pc)
}
#endif /* CONFIG_GENERIC_BUG */
-void trap_init(void)
+#ifdef CONFIG_VMAP_STACK
+static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
+ overflow_stack)__aligned(16);
+/*
+ * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used
+ * to get per-cpu overflow stack(get_overflow_stack).
+ */
+long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)];
+asmlinkage unsigned long get_overflow_stack(void)
+{
+ return (unsigned long)this_cpu_ptr(overflow_stack) +
+ OVERFLOW_STACK_SIZE;
+}
+
+asmlinkage void handle_bad_stack(struct pt_regs *regs)
{
- /*
- * Set sup0 scratch register to 0, indicating to exception vector
- * that we are presently executing in the kernel
- */
- csr_write(CSR_SCRATCH, 0);
- /* Set the exception vector address */
- csr_write(CSR_TVEC, &handle_exception);
+ unsigned long tsk_stk = (unsigned long)current->stack;
+ unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
+
+ console_verbose();
+
+ pr_emerg("Insufficient stack space to handle exception!\n");
+ pr_emerg("Task stack: [0x%016lx..0x%016lx]\n",
+ tsk_stk, tsk_stk + THREAD_SIZE);
+ pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n",
+ ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE);
+
+ __show_regs(regs);
+ panic("Kernel stack overflow");
+
+ for (;;)
+ wait_for_interrupt();
}
+#endif
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index 678204231700..69b05b6c181b 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -12,16 +12,33 @@
#include <linux/binfmts.h>
#include <linux/err.h>
#include <asm/page.h>
-#ifdef GENERIC_TIME_VSYSCALL
+#include <asm/vdso.h>
+#include <linux/time_namespace.h>
+
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL
#include <vdso/datapage.h>
#else
-#include <asm/vdso.h>
+struct vdso_data {
+};
#endif
extern char vdso_start[], vdso_end[];
+#ifdef CONFIG_COMPAT
+extern char compat_vdso_start[], compat_vdso_end[];
+#endif
+
+enum vvar_pages {
+ VVAR_DATA_PAGE_OFFSET,
+ VVAR_TIMENS_PAGE_OFFSET,
+ VVAR_NR_PAGES,
+};
+
+enum rv_vdso_map {
+ RV_VDSO_MAP_VVAR,
+ RV_VDSO_MAP_VDSO,
+};
-static unsigned int vdso_pages;
-static struct page **vdso_pagelist;
+#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT)
/*
* The vDSO data page.
@@ -32,80 +49,266 @@ static union {
} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;
-static int __init vdso_init(void)
+struct __vdso_info {
+ const char *name;
+ const char *vdso_code_start;
+ const char *vdso_code_end;
+ unsigned long vdso_pages;
+ /* Data Mapping */
+ struct vm_special_mapping *dm;
+ /* Code Mapping */
+ struct vm_special_mapping *cm;
+};
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ current->mm->context.vdso = (void *)new_vma->vm_start;
+
+ return 0;
+}
+
+static void __init __vdso_init(struct __vdso_info *vdso_info)
{
unsigned int i;
+ struct page **vdso_pagelist;
+ unsigned long pfn;
+
+ if (memcmp(vdso_info->vdso_code_start, "\177ELF", 4))
+ panic("vDSO is not a valid ELF object!\n");
+
+ vdso_info->vdso_pages = (
+ vdso_info->vdso_code_end -
+ vdso_info->vdso_code_start) >>
+ PAGE_SHIFT;
+
+ vdso_pagelist = kcalloc(vdso_info->vdso_pages,
+ sizeof(struct page *),
+ GFP_KERNEL);
+ if (vdso_pagelist == NULL)
+ panic("vDSO kcalloc failed!\n");
- vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
- vdso_pagelist =
- kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL);
- if (unlikely(vdso_pagelist == NULL)) {
- pr_err("vdso: pagelist allocation failed\n");
- return -ENOMEM;
+ /* Grab the vDSO code pages. */
+ pfn = sym_to_pfn(vdso_info->vdso_code_start);
+
+ for (i = 0; i < vdso_info->vdso_pages; i++)
+ vdso_pagelist[i] = pfn_to_page(pfn + i);
+
+ vdso_info->cm->pages = vdso_pagelist;
+}
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+ return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+ struct __vdso_info *vdso_info = mm->context.vdso_info;
+
+ mmap_read_lock(mm);
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long size = vma->vm_end - vma->vm_start;
+
+ if (vma_is_special_mapping(vma, vdso_info->dm))
+ zap_page_range(vma, vma->vm_start, size);
}
- for (i = 0; i < vdso_pages; i++) {
- struct page *pg;
+ mmap_read_unlock(mm);
+ return 0;
+}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_mm == current->mm))
+ return current->nsproxy->time_ns->vvar_page;
+
+ /*
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
+ * through interfaces like /proc/$pid/mem or
+ * process_vm_{readv,writev}() as long as there's no .access()
+ * in special_mapping_vmops.
+ * For more details check_vma_flags() and __access_remote_vm()
+ */
+ WARN(1, "vvar_page accessed remotely");
+
+ return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *timens_page = find_timens_vvar_page(vma);
+ unsigned long pfn;
- pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
- vdso_pagelist[i] = pg;
+ switch (vmf->pgoff) {
+ case VVAR_DATA_PAGE_OFFSET:
+ if (timens_page)
+ pfn = page_to_pfn(timens_page);
+ else
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#ifdef CONFIG_TIME_NS
+ case VVAR_TIMENS_PAGE_OFFSET:
+ /*
+ * If a task belongs to a time namespace then a namespace
+ * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+ * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+ * offset.
+ * See also the comment near timens_setup_vdso_data().
+ */
+ if (!timens_page)
+ return VM_FAULT_SIGBUS;
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#endif /* CONFIG_TIME_NS */
+ default:
+ return VM_FAULT_SIGBUS;
}
- vdso_pagelist[i] = virt_to_page(vdso_data);
+
+ return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
+ [RV_VDSO_MAP_VVAR] = {
+ .name = "[vvar]",
+ .fault = vvar_fault,
+ },
+ [RV_VDSO_MAP_VDSO] = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
+ },
+};
+
+static struct __vdso_info vdso_info __ro_after_init = {
+ .name = "vdso",
+ .vdso_code_start = vdso_start,
+ .vdso_code_end = vdso_end,
+ .dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR],
+ .cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO],
+};
+
+#ifdef CONFIG_COMPAT
+static struct vm_special_mapping rv_compat_vdso_maps[] __ro_after_init = {
+ [RV_VDSO_MAP_VVAR] = {
+ .name = "[vvar]",
+ .fault = vvar_fault,
+ },
+ [RV_VDSO_MAP_VDSO] = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
+ },
+};
+
+static struct __vdso_info compat_vdso_info __ro_after_init = {
+ .name = "compat_vdso",
+ .vdso_code_start = compat_vdso_start,
+ .vdso_code_end = compat_vdso_end,
+ .dm = &rv_compat_vdso_maps[RV_VDSO_MAP_VVAR],
+ .cm = &rv_compat_vdso_maps[RV_VDSO_MAP_VDSO],
+};
+#endif
+
+static int __init vdso_init(void)
+{
+ __vdso_init(&vdso_info);
+#ifdef CONFIG_COMPAT
+ __vdso_init(&compat_vdso_info);
+#endif
return 0;
}
arch_initcall(vdso_init);
-int arch_setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp)
+static int __setup_additional_pages(struct mm_struct *mm,
+ struct linux_binprm *bprm,
+ int uses_interp,
+ struct __vdso_info *vdso_info)
{
- struct mm_struct *mm = current->mm;
- unsigned long vdso_base, vdso_len;
- int ret;
+ unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+ void *ret;
+
+ BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
- vdso_len = (vdso_pages + 1) << PAGE_SHIFT;
+ vdso_text_len = vdso_info->vdso_pages << PAGE_SHIFT;
+ /* Be sure to map the data page */
+ vdso_mapping_len = vdso_text_len + VVAR_SIZE;
- mmap_write_lock(mm);
- vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
+ vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
- ret = vdso_base;
- goto end;
+ ret = ERR_PTR(vdso_base);
+ goto up_fail;
}
- /*
- * Put vDSO base into mm struct. We need to do this before calling
- * install_special_mapping or the perf counter mmap tracking code
- * will fail to recognise it as a vDSO (since arch_vma_name fails).
- */
+ ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
+ (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info->dm);
+ if (IS_ERR(ret))
+ goto up_fail;
+
+ vdso_base += VVAR_SIZE;
mm->context.vdso = (void *)vdso_base;
+ mm->context.vdso_info = (void *)vdso_info;
ret =
- install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+ _install_special_mapping(mm, vdso_base, vdso_text_len,
(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
- vdso_pagelist);
+ vdso_info->cm);
- if (unlikely(ret)) {
- mm->context.vdso = NULL;
- goto end;
- }
+ if (IS_ERR(ret))
+ goto up_fail;
+
+ return 0;
+
+up_fail:
+ mm->context.vdso = NULL;
+ return PTR_ERR(ret);
+}
+
+#ifdef CONFIG_COMPAT
+int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
+ int uses_interp)
+{
+ struct mm_struct *mm = current->mm;
+ int ret;
- vdso_base += (vdso_pages << PAGE_SHIFT);
- ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
- (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
- if (unlikely(ret))
- mm->context.vdso = NULL;
-end:
+ ret = __setup_additional_pages(mm, bprm, uses_interp,
+ &compat_vdso_info);
mmap_write_unlock(mm);
+
return ret;
}
+#endif
-const char *arch_vma_name(struct vm_area_struct *vma)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
- if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
- return "[vdso]";
- if (vma->vm_mm && (vma->vm_start ==
- (long)vma->vm_mm->context.vdso + PAGE_SIZE))
- return "[vdso_data]";
- return NULL;
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+ ret = __setup_additional_pages(mm, bprm, uses_interp, &vdso_info);
+ mmap_write_unlock(mm);
+
+ return ret;
}
diff --git a/arch/riscv/kernel/vdso/.gitignore b/arch/riscv/kernel/vdso/.gitignore
index 11ebee9e4c1d..3a19def868ec 100644
--- a/arch/riscv/kernel/vdso/.gitignore
+++ b/arch/riscv/kernel/vdso/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
vdso.lds
*.tmp
+vdso-syms.S
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 38ba55b0eb9d..f2e065671e4d 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -16,55 +16,56 @@ vdso-syms += flush_icache
# Files to link into the vdso
obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+ccflags-y := -fno-stack-protector
+
ifneq ($(c-gettimeofday-y),)
- CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
+ CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
endif
# Build rules
-targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
-obj-y += vdso.o vdso-syms.o
+obj-y += vdso.o
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-# Disable gcov profiling for VDSO code
+# Disable -pg to prevent insert call site
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+
+# Disable profiling and instrumentation for VDSO code
GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
# Force dependency
$(obj)/vdso.o: $(obj)/vdso.so
# link rule for the .so file, .lds has to be first
-SYSCFLAGS_vdso.so.dbg = $(c_flags)
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE
- $(call if_changed,vdsold)
-
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO. With ld --just-symbols we can then
-# refer to these symbols in the kernel code rather than hand-coded addresses.
-
-SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
- -Wl,--build-id -Wl,--hash-style=both
-$(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold)
-
-LDFLAGS_vdso-syms.o := -r --just-symbols
-$(obj)/vdso-syms.o: $(obj)/vdso-dummy.o FORCE
- $(call if_changed,ld)
+LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \
+ --build-id=sha1 --hash-style=both --eh-frame-hdr
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,vdsosym)
+
# actual build commands
# The DSO images are built using a special linker script
-# Add -lgcc so rv32 gets static muldi3 and lshrdi3 definitions.
# Make sure only to export the intended __vdso_xxx symbol offsets.
quiet_cmd_vdsold = VDSOLD $@
- cmd_vdsold = $(CC) $(KBUILD_CFLAGS) $(call cc-option, -no-pie) -nostdlib -nostartfiles $(SYSCFLAGS_$(@F)) \
- -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp && \
- $(CROSS_COMPILE)objcopy \
- $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
+ cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \
+ $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
rm $@.tmp
# install commands for the unstripped file
diff --git a/arch/riscv/kernel/vdso/gen_vdso_offsets.sh b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..c2e5613f3495
--- /dev/null
+++ b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+LC_ALL=C
+sed -n -e 's/^[0]\+\(0[0-9a-fA-F]*\) . \(__vdso_[a-zA-Z0-9_]*\)$/\#define \2_offset\t0x\1/p'
diff --git a/arch/riscv/kernel/vdso/vdso.S b/arch/riscv/kernel/vdso/vdso.S
index df222245be05..83f1c899e8d8 100644
--- a/arch/riscv/kernel/vdso/vdso.S
+++ b/arch/riscv/kernel/vdso/vdso.S
@@ -7,12 +7,16 @@
#include <linux/linkage.h>
#include <asm/page.h>
+#ifndef __VDSO_PATH
+#define __VDSO_PATH "arch/riscv/kernel/vdso/vdso.so"
+#endif
+
__PAGE_ALIGNED_DATA
.globl vdso_start, vdso_end
.balign PAGE_SIZE
vdso_start:
- .incbin "arch/riscv/kernel/vdso/vdso.so"
+ .incbin __VDSO_PATH
.balign PAGE_SIZE
vdso_end:
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index e6f558bca71b..01d94aae5bf5 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -3,12 +3,16 @@
* Copyright (C) 2012 Regents of the University of California
*/
#include <asm/page.h>
+#include <asm/vdso.h>
OUTPUT_ARCH(riscv)
SECTIONS
{
- PROVIDE(_vdso_data = . + PAGE_SIZE);
+ PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+ PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
diff --git a/arch/riscv/kernel/vdso/vgettimeofday.c b/arch/riscv/kernel/vdso/vgettimeofday.c
index d264943e2e47..cc0d80699c31 100644
--- a/arch/riscv/kernel/vdso/vgettimeofday.c
+++ b/arch/riscv/kernel/vdso/vgettimeofday.c
@@ -9,16 +9,22 @@
#include <linux/time.h>
#include <linux/types.h>
+extern
+int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
return __cvdso_clock_gettime(clock, ts);
}
+extern
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
return __cvdso_gettimeofday(tv, tz);
}
+extern
+int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res);
int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res)
{
return __cvdso_clock_getres(clock_id, res);
diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
new file mode 100644
index 000000000000..75e0fa8a700a
--- /dev/null
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ * Copyright (C) 2020 Vitaly Wool, Konsulko AB
+ */
+
+#include <asm/pgtable.h>
+#define LOAD_OFFSET KERNEL_LINK_ADDR
+/* No __ro_after_init data in the .rodata section - which will always be ro */
+#define RO_AFTER_INIT_DATA
+
+#include <asm/vmlinux.lds.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+
+OUTPUT_ARCH(riscv)
+ENTRY(_start)
+
+jiffies = jiffies_64;
+
+SECTIONS
+{
+ /* Beginning of code and text segment */
+ . = LOAD_OFFSET;
+ _xiprom = .;
+ _start = .;
+ HEAD_TEXT_SECTION
+ INIT_TEXT_SECTION(PAGE_SIZE)
+ /* we have to discard exit text and such at runtime, not link time */
+ .exit.text :
+ {
+ EXIT_TEXT
+ }
+
+ .text : {
+ _text = .;
+ _stext = .;
+ TEXT_TEXT
+ SCHED_TEXT
+ CPUIDLE_TEXT
+ LOCK_TEXT
+ KPROBES_TEXT
+ ENTRY_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
+ _etext = .;
+ }
+ RO_DATA(L1_CACHE_BYTES)
+ .srodata : {
+ *(.srodata*)
+ }
+ .init.rodata : {
+ INIT_SETUP(16)
+ INIT_CALLS
+ CON_INITCALL
+ INIT_RAM_FS
+ }
+ _exiprom = .; /* End of XIP ROM area */
+
+
+/*
+ * From this point, stuff is considered writable and will be copied to RAM
+ */
+ __data_loc = ALIGN(PAGE_SIZE); /* location in file */
+ . = KERNEL_LINK_ADDR + XIP_OFFSET; /* location in memory */
+
+#undef LOAD_OFFSET
+#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK))
+
+ _sdata = .; /* Start of data section */
+ _data = .;
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ _edata = .;
+ __start_ro_after_init = .;
+ .data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
+ *(.data..ro_after_init)
+ }
+ __end_ro_after_init = .;
+
+ . = ALIGN(PAGE_SIZE);
+ __init_begin = .;
+ .init.data : {
+ INIT_DATA
+ }
+ .exit.data : {
+ EXIT_DATA
+ }
+ . = ALIGN(8);
+ __soc_early_init_table : {
+ __soc_early_init_table_start = .;
+ KEEP(*(__soc_early_init_table))
+ __soc_early_init_table_end = .;
+ }
+ __soc_builtin_dtb_table : {
+ __soc_builtin_dtb_table_start = .;
+ KEEP(*(__soc_builtin_dtb_table))
+ __soc_builtin_dtb_table_end = .;
+ }
+
+ . = ALIGN(8);
+ .alternative : {
+ __alt_start = .;
+ *(.alternative)
+ __alt_end = .;
+ }
+ __init_end = .;
+
+ . = ALIGN(16);
+ .xip.traps : {
+ __xip_traps_start = .;
+ *(.xip.traps)
+ __xip_traps_end = .;
+ }
+
+ . = ALIGN(PAGE_SIZE);
+ .sdata : {
+ __global_pointer$ = . + 0x800;
+ *(.sdata*)
+ *(.sbss*)
+ }
+
+ BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
+
+ PERCPU_SECTION(L1_CACHE_BYTES)
+
+ .rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) {
+ *(.rel.dyn*)
+ }
+
+ /*
+ * End of copied data. We need a dummy section to get its LMA.
+ * Also located before final ALIGN() as trailing padding is not stored
+ * in the resulting binary file and useless to copy.
+ */
+ .data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { }
+ _edata_loc = LOADADDR(.data.endmark);
+
+ . = ALIGN(PAGE_SIZE);
+ _end = .;
+
+ STABS_DEBUG
+ DWARF_DEBUG
+
+ DISCARDS
+}
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index e6f8016b366a..4e6c88aa4d87 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -4,12 +4,21 @@
* Copyright (C) 2017 SiFive
*/
-#define LOAD_OFFSET PAGE_OFFSET
+#define RO_EXCEPTION_TABLE_ALIGN 4
+
+#ifdef CONFIG_XIP_KERNEL
+#include "vmlinux-xip.lds.S"
+#else
+
+#include <asm/pgtable.h>
+#define LOAD_OFFSET KERNEL_LINK_ADDR
+
#include <asm/vmlinux.lds.h>
#include <asm/page.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
#include <asm/set_memory.h>
+#include "image-vars.h"
#include <linux/sizes.h>
OUTPUT_ARCH(riscv)
@@ -17,6 +26,9 @@ ENTRY(_start)
jiffies = jiffies_64;
+PECOFF_SECTION_ALIGNMENT = 0x1000;
+PECOFF_FILE_ALIGNMENT = 0x200;
+
SECTIONS
{
/* Beginning of code and text segment */
@@ -25,9 +37,29 @@ SECTIONS
HEAD_TEXT_SECTION
. = ALIGN(PAGE_SIZE);
+ .text : {
+ _text = .;
+ _stext = .;
+ TEXT_TEXT
+ SCHED_TEXT
+ CPUIDLE_TEXT
+ LOCK_TEXT
+ KPROBES_TEXT
+ ENTRY_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
+ _etext = .;
+ }
+
+ . = ALIGN(SECTION_ALIGN);
__init_begin = .;
- INIT_TEXT_SECTION(PAGE_SIZE)
- INIT_DATA_SECTION(16)
+ __init_text_begin = .;
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) ALIGN(SECTION_ALIGN) { \
+ _sinittext = .; \
+ INIT_TEXT \
+ _einittext = .; \
+ }
+
. = ALIGN(8);
__soc_early_init_table : {
__soc_early_init_table_start = .;
@@ -44,29 +76,36 @@ SECTIONS
{
EXIT_TEXT
}
+
+ __init_text_end = .;
+ . = ALIGN(SECTION_ALIGN);
+#ifdef CONFIG_EFI
+ . = ALIGN(PECOFF_SECTION_ALIGNMENT);
+ __pecoff_text_end = .;
+#endif
+ /* Start of init data section */
+ __init_data_begin = .;
+ INIT_DATA_SECTION(16)
.exit.data :
{
EXIT_DATA
}
PERCPU_SECTION(L1_CACHE_BYTES)
- __init_end = .;
- . = ALIGN(SECTION_ALIGN);
- .text : {
- _text = .;
- _stext = .;
- TEXT_TEXT
- SCHED_TEXT
- CPUIDLE_TEXT
- LOCK_TEXT
- KPROBES_TEXT
- ENTRY_TEXT
- IRQENTRY_TEXT
- SOFTIRQENTRY_TEXT
- *(.fixup)
- _etext = .;
+ .rel.dyn : {
+ *(.rel.dyn*)
}
+ __init_data_end = .;
+
+ . = ALIGN(8);
+ .alternative : {
+ __alt_start = .;
+ *(.alternative)
+ __alt_end = .;
+ }
+ __init_end = .;
+
/* Start of data section */
_sdata = .;
RO_DATA(SECTION_ALIGN)
@@ -74,29 +113,35 @@ SECTIONS
*(.srodata*)
}
- EXCEPTION_TABLE(0x10)
-
. = ALIGN(SECTION_ALIGN);
_data = .;
- RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
.sdata : {
__global_pointer$ = . + 0x800;
*(.sdata*)
- /* End of data section */
- _edata = .;
}
- BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
+#ifdef CONFIG_EFI
+ .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
+ __pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);
+#endif
- .rel.dyn : {
- *(.rel.dyn*)
- }
+ /* End of data section */
+ _edata = .;
+
+ BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
+#ifdef CONFIG_EFI
+ . = ALIGN(PECOFF_SECTION_ALIGNMENT);
+ __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
+#endif
_end = .;
STABS_DEBUG
DWARF_DEBUG
+ ELF_DETAILS
DISCARDS
}
+#endif /* CONFIG_XIP_KERNEL */
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
new file mode 100644
index 000000000000..f5a342fa1b1d
--- /dev/null
+++ b/arch/riscv/kvm/Kconfig
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ help
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
+ depends on RISCV_SBI && MMU
+ select MMU_NOTIFIER
+ select PREEMPT_NOTIFIERS
+ select KVM_MMIO
+ select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
+ select HAVE_KVM_EVENTFD
+ select SRCU
+ help
+ Support hosting virtualized guest machines.
+
+ If unsure, say N.
+
+endif # VIRTUALIZATION
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
new file mode 100644
index 000000000000..e5c56182f48f
--- /dev/null
+++ b/arch/riscv/kvm/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for RISC-V KVM support
+#
+
+ccflags-y += -I $(srctree)/$(src)
+
+include $(srctree)/virt/kvm/Makefile.kvm
+
+obj-$(CONFIG_KVM) += kvm.o
+
+kvm-y += main.o
+kvm-y += vm.o
+kvm-y += vmid.o
+kvm-y += tlb.o
+kvm-y += mmu.o
+kvm-y += vcpu.o
+kvm-y += vcpu_exit.o
+kvm-y += vcpu_fp.o
+kvm-y += vcpu_switch.o
+kvm-y += vcpu_sbi.o
+kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
+kvm-y += vcpu_sbi_base.o
+kvm-y += vcpu_sbi_replace.o
+kvm-y += vcpu_sbi_hsm.o
+kvm-y += vcpu_timer.o
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
new file mode 100644
index 000000000000..1549205fe5fe
--- /dev/null
+++ b/arch/riscv/kvm/main.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/hwcap.h>
+#include <asm/sbi.h>
+
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_check_processor_compat(void *opaque)
+{
+ return 0;
+}
+
+int kvm_arch_hardware_setup(void *opaque)
+{
+ return 0;
+}
+
+int kvm_arch_hardware_enable(void)
+{
+ unsigned long hideleg, hedeleg;
+
+ hedeleg = 0;
+ hedeleg |= (1UL << EXC_INST_MISALIGNED);
+ hedeleg |= (1UL << EXC_BREAKPOINT);
+ hedeleg |= (1UL << EXC_SYSCALL);
+ hedeleg |= (1UL << EXC_INST_PAGE_FAULT);
+ hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT);
+ hedeleg |= (1UL << EXC_STORE_PAGE_FAULT);
+ csr_write(CSR_HEDELEG, hedeleg);
+
+ hideleg = 0;
+ hideleg |= (1UL << IRQ_VS_SOFT);
+ hideleg |= (1UL << IRQ_VS_TIMER);
+ hideleg |= (1UL << IRQ_VS_EXT);
+ csr_write(CSR_HIDELEG, hideleg);
+
+ csr_write(CSR_HCOUNTEREN, -1UL);
+
+ csr_write(CSR_HVIP, 0);
+
+ return 0;
+}
+
+void kvm_arch_hardware_disable(void)
+{
+ /*
+ * After clearing the hideleg CSR, the host kernel will receive
+ * spurious interrupts if hvip CSR has pending interrupts and the
+ * corresponding enable bits in vsie CSR are asserted. To avoid it,
+ * hvip CSR and vsie CSR must be cleared before clearing hideleg CSR.
+ */
+ csr_write(CSR_VSIE, 0);
+ csr_write(CSR_HVIP, 0);
+ csr_write(CSR_HEDELEG, 0);
+ csr_write(CSR_HIDELEG, 0);
+}
+
+int kvm_arch_init(void *opaque)
+{
+ const char *str;
+
+ if (!riscv_isa_extension_available(NULL, h)) {
+ kvm_info("hypervisor extension not available\n");
+ return -ENODEV;
+ }
+
+ if (sbi_spec_is_0_1()) {
+ kvm_info("require SBI v0.2 or higher\n");
+ return -ENODEV;
+ }
+
+ if (sbi_probe_extension(SBI_EXT_RFENCE) <= 0) {
+ kvm_info("require SBI RFENCE extension\n");
+ return -ENODEV;
+ }
+
+ kvm_riscv_gstage_mode_detect();
+
+ kvm_riscv_gstage_vmid_detect();
+
+ kvm_info("hypervisor extension available\n");
+
+ switch (kvm_riscv_gstage_mode()) {
+ case HGATP_MODE_SV32X4:
+ str = "Sv32x4";
+ break;
+ case HGATP_MODE_SV39X4:
+ str = "Sv39x4";
+ break;
+ case HGATP_MODE_SV48X4:
+ str = "Sv48x4";
+ break;
+ case HGATP_MODE_SV57X4:
+ str = "Sv57x4";
+ break;
+ default:
+ return -ENODEV;
+ }
+ kvm_info("using %s G-stage page table format\n", str);
+
+ kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
+
+ return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
+
+static int riscv_kvm_init(void)
+{
+ return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+}
+module_init(riscv_kvm_init);
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
new file mode 100644
index 000000000000..1c00695ebee7
--- /dev/null
+++ b/arch/riscv/kvm/mmu.c
@@ -0,0 +1,786 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/hugetlb.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/kvm_host.h>
+#include <linux/sched/signal.h>
+#include <asm/csr.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_64BIT
+static unsigned long gstage_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
+static unsigned long gstage_pgd_levels = 3;
+#define gstage_index_bits 9
+#else
+static unsigned long gstage_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
+static unsigned long gstage_pgd_levels = 2;
+#define gstage_index_bits 10
+#endif
+
+#define gstage_pgd_xbits 2
+#define gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + gstage_pgd_xbits))
+#define gstage_gpa_bits (HGATP_PAGE_SHIFT + \
+ (gstage_pgd_levels * gstage_index_bits) + \
+ gstage_pgd_xbits)
+#define gstage_gpa_size ((gpa_t)(1ULL << gstage_gpa_bits))
+
+#define gstage_pte_leaf(__ptep) \
+ (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
+
+static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
+{
+ unsigned long mask;
+ unsigned long shift = HGATP_PAGE_SHIFT + (gstage_index_bits * level);
+
+ if (level == (gstage_pgd_levels - 1))
+ mask = (PTRS_PER_PTE * (1UL << gstage_pgd_xbits)) - 1;
+ else
+ mask = PTRS_PER_PTE - 1;
+
+ return (addr >> shift) & mask;
+}
+
+static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
+{
+ return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT);
+}
+
+static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
+{
+ u32 i;
+ unsigned long psz = 1UL << 12;
+
+ for (i = 0; i < gstage_pgd_levels; i++) {
+ if (page_size == (psz << (i * gstage_index_bits))) {
+ *out_level = i;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
+{
+ if (gstage_pgd_levels < level)
+ return -EINVAL;
+
+ *out_pgorder = 12 + (level * gstage_index_bits);
+ return 0;
+}
+
+static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
+{
+ int rc;
+ unsigned long page_order = PAGE_SHIFT;
+
+ rc = gstage_level_to_page_order(level, &page_order);
+ if (rc)
+ return rc;
+
+ *out_pgsize = BIT(page_order);
+ return 0;
+}
+
+static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
+ pte_t **ptepp, u32 *ptep_level)
+{
+ pte_t *ptep;
+ u32 current_level = gstage_pgd_levels - 1;
+
+ *ptep_level = current_level;
+ ptep = (pte_t *)kvm->arch.pgd;
+ ptep = &ptep[gstage_pte_index(addr, current_level)];
+ while (ptep && pte_val(*ptep)) {
+ if (gstage_pte_leaf(ptep)) {
+ *ptep_level = current_level;
+ *ptepp = ptep;
+ return true;
+ }
+
+ if (current_level) {
+ current_level--;
+ *ptep_level = current_level;
+ ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ ptep = &ptep[gstage_pte_index(addr, current_level)];
+ } else {
+ ptep = NULL;
+ }
+ }
+
+ return false;
+}
+
+static void gstage_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
+{
+ unsigned long order = PAGE_SHIFT;
+
+ if (gstage_level_to_page_order(level, &order))
+ return;
+ addr &= ~(BIT(order) - 1);
+
+ kvm_riscv_hfence_gvma_vmid_gpa(kvm, -1UL, 0, addr, BIT(order), order);
+}
+
+static int gstage_set_pte(struct kvm *kvm, u32 level,
+ struct kvm_mmu_memory_cache *pcache,
+ gpa_t addr, const pte_t *new_pte)
+{
+ u32 current_level = gstage_pgd_levels - 1;
+ pte_t *next_ptep = (pte_t *)kvm->arch.pgd;
+ pte_t *ptep = &next_ptep[gstage_pte_index(addr, current_level)];
+
+ if (current_level < level)
+ return -EINVAL;
+
+ while (current_level != level) {
+ if (gstage_pte_leaf(ptep))
+ return -EEXIST;
+
+ if (!pte_val(*ptep)) {
+ if (!pcache)
+ return -ENOMEM;
+ next_ptep = kvm_mmu_memory_cache_alloc(pcache);
+ if (!next_ptep)
+ return -ENOMEM;
+ *ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
+ __pgprot(_PAGE_TABLE));
+ } else {
+ if (gstage_pte_leaf(ptep))
+ return -EEXIST;
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ }
+
+ current_level--;
+ ptep = &next_ptep[gstage_pte_index(addr, current_level)];
+ }
+
+ *ptep = *new_pte;
+ if (gstage_pte_leaf(ptep))
+ gstage_remote_tlb_flush(kvm, current_level, addr);
+
+ return 0;
+}
+
+static int gstage_map_page(struct kvm *kvm,
+ struct kvm_mmu_memory_cache *pcache,
+ gpa_t gpa, phys_addr_t hpa,
+ unsigned long page_size,
+ bool page_rdonly, bool page_exec)
+{
+ int ret;
+ u32 level = 0;
+ pte_t new_pte;
+ pgprot_t prot;
+
+ ret = gstage_page_size_to_level(page_size, &level);
+ if (ret)
+ return ret;
+
+ /*
+ * A RISC-V implementation can choose to either:
+ * 1) Update 'A' and 'D' PTE bits in hardware
+ * 2) Generate page fault when 'A' and/or 'D' bits are not set
+ * PTE so that software can update these bits.
+ *
+ * We support both options mentioned above. To achieve this, we
+ * always set 'A' and 'D' PTE bits at time of creating G-stage
+ * mapping. To support KVM dirty page logging with both options
+ * mentioned above, we will write-protect G-stage PTEs to track
+ * dirty pages.
+ */
+
+ if (page_exec) {
+ if (page_rdonly)
+ prot = PAGE_READ_EXEC;
+ else
+ prot = PAGE_WRITE_EXEC;
+ } else {
+ if (page_rdonly)
+ prot = PAGE_READ;
+ else
+ prot = PAGE_WRITE;
+ }
+ new_pte = pfn_pte(PFN_DOWN(hpa), prot);
+ new_pte = pte_mkdirty(new_pte);
+
+ return gstage_set_pte(kvm, level, pcache, gpa, &new_pte);
+}
+
+enum gstage_op {
+ GSTAGE_OP_NOP = 0, /* Nothing */
+ GSTAGE_OP_CLEAR, /* Clear/Unmap */
+ GSTAGE_OP_WP, /* Write-protect */
+};
+
+static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
+ pte_t *ptep, u32 ptep_level, enum gstage_op op)
+{
+ int i, ret;
+ pte_t *next_ptep;
+ u32 next_ptep_level;
+ unsigned long next_page_size, page_size;
+
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
+ if (ret)
+ return;
+
+ BUG_ON(addr & (page_size - 1));
+
+ if (!pte_val(*ptep))
+ return;
+
+ if (ptep_level && !gstage_pte_leaf(ptep)) {
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ next_ptep_level = ptep_level - 1;
+ ret = gstage_level_to_page_size(next_ptep_level,
+ &next_page_size);
+ if (ret)
+ return;
+
+ if (op == GSTAGE_OP_CLEAR)
+ set_pte(ptep, __pte(0));
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ gstage_op_pte(kvm, addr + i * next_page_size,
+ &next_ptep[i], next_ptep_level, op);
+ if (op == GSTAGE_OP_CLEAR)
+ put_page(virt_to_page(next_ptep));
+ } else {
+ if (op == GSTAGE_OP_CLEAR)
+ set_pte(ptep, __pte(0));
+ else if (op == GSTAGE_OP_WP)
+ set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE));
+ gstage_remote_tlb_flush(kvm, ptep_level, addr);
+ }
+}
+
+static void gstage_unmap_range(struct kvm *kvm, gpa_t start,
+ gpa_t size, bool may_block)
+{
+ int ret;
+ pte_t *ptep;
+ u32 ptep_level;
+ bool found_leaf;
+ unsigned long page_size;
+ gpa_t addr = start, end = start + size;
+
+ while (addr < end) {
+ found_leaf = gstage_get_leaf_entry(kvm, addr,
+ &ptep, &ptep_level);
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
+ if (ret)
+ break;
+
+ if (!found_leaf)
+ goto next;
+
+ if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
+ gstage_op_pte(kvm, addr, ptep,
+ ptep_level, GSTAGE_OP_CLEAR);
+
+next:
+ addr += page_size;
+
+ /*
+ * If the range is too large, release the kvm->mmu_lock
+ * to prevent starvation and lockup detector warnings.
+ */
+ if (may_block && addr < end)
+ cond_resched_lock(&kvm->mmu_lock);
+ }
+}
+
+static void gstage_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
+{
+ int ret;
+ pte_t *ptep;
+ u32 ptep_level;
+ bool found_leaf;
+ gpa_t addr = start;
+ unsigned long page_size;
+
+ while (addr < end) {
+ found_leaf = gstage_get_leaf_entry(kvm, addr,
+ &ptep, &ptep_level);
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
+ if (ret)
+ break;
+
+ if (!found_leaf)
+ goto next;
+
+ if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
+ gstage_op_pte(kvm, addr, ptep,
+ ptep_level, GSTAGE_OP_WP);
+
+next:
+ addr += page_size;
+ }
+}
+
+static void gstage_wp_memory_region(struct kvm *kvm, int slot)
+{
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
+ phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
+ phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
+
+ spin_lock(&kvm->mmu_lock);
+ gstage_wp_range(kvm, start, end);
+ spin_unlock(&kvm->mmu_lock);
+ kvm_flush_remote_tlbs(kvm);
+}
+
+static int gstage_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
+ unsigned long size, bool writable)
+{
+ pte_t pte;
+ int ret = 0;
+ unsigned long pfn;
+ phys_addr_t addr, end;
+ struct kvm_mmu_memory_cache pcache;
+
+ memset(&pcache, 0, sizeof(pcache));
+ pcache.gfp_zero = __GFP_ZERO;
+
+ end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
+ pfn = __phys_to_pfn(hpa);
+
+ for (addr = gpa; addr < end; addr += PAGE_SIZE) {
+ pte = pfn_pte(pfn, PAGE_KERNEL);
+
+ if (!writable)
+ pte = pte_wrprotect(pte);
+
+ ret = kvm_mmu_topup_memory_cache(&pcache, gstage_pgd_levels);
+ if (ret)
+ goto out;
+
+ spin_lock(&kvm->mmu_lock);
+ ret = gstage_set_pte(kvm, 0, &pcache, addr, &pte);
+ spin_unlock(&kvm->mmu_lock);
+ if (ret)
+ goto out;
+
+ pfn++;
+ }
+
+out:
+ kvm_mmu_free_memory_cache(&pcache);
+ return ret;
+}
+
+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset,
+ unsigned long mask)
+{
+ phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
+ phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
+ phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
+
+ gstage_wp_range(kvm, start, end);
+}
+
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
+void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ kvm_flush_remote_tlbs(kvm);
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
+{
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+ kvm_riscv_gstage_free_pgd(kvm);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot)
+{
+ gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+ phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+ spin_lock(&kvm->mmu_lock);
+ gstage_unmap_range(kvm, gpa, size, false);
+ spin_unlock(&kvm->mmu_lock);
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
+{
+ /*
+ * At this point memslot has been committed and there is an
+ * allocated dirty_bitmap[], dirty pages will be tracked while
+ * the memory slot is write protected.
+ */
+ if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES)
+ gstage_wp_memory_region(kvm, new->id);
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
+{
+ hva_t hva, reg_end, size;
+ gpa_t base_gpa;
+ bool writable;
+ int ret = 0;
+
+ if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
+ change != KVM_MR_FLAGS_ONLY)
+ return 0;
+
+ /*
+ * Prevent userspace from creating a memory region outside of the GPA
+ * space addressable by the KVM guest GPA space.
+ */
+ if ((new->base_gfn + new->npages) >=
+ (gstage_gpa_size >> PAGE_SHIFT))
+ return -EFAULT;
+
+ hva = new->userspace_addr;
+ size = new->npages << PAGE_SHIFT;
+ reg_end = hva + size;
+ base_gpa = new->base_gfn << PAGE_SHIFT;
+ writable = !(new->flags & KVM_MEM_READONLY);
+
+ mmap_read_lock(current->mm);
+
+ /*
+ * A memory region could potentially cover multiple VMAs, and
+ * any holes between them, so iterate over all of them to find
+ * out if we can map any of them right now.
+ *
+ * +--------------------------------------------+
+ * +---------------+----------------+ +----------------+
+ * | : VMA 1 | VMA 2 | | VMA 3 : |
+ * +---------------+----------------+ +----------------+
+ * | memory region |
+ * +--------------------------------------------+
+ */
+ do {
+ struct vm_area_struct *vma = find_vma(current->mm, hva);
+ hva_t vm_start, vm_end;
+
+ if (!vma || vma->vm_start >= reg_end)
+ break;
+
+ /*
+ * Mapping a read-only VMA is only allowed if the
+ * memory region is configured as read-only.
+ */
+ if (writable && !(vma->vm_flags & VM_WRITE)) {
+ ret = -EPERM;
+ break;
+ }
+
+ /* Take the intersection of this VMA with the memory region */
+ vm_start = max(hva, vma->vm_start);
+ vm_end = min(reg_end, vma->vm_end);
+
+ if (vma->vm_flags & VM_PFNMAP) {
+ gpa_t gpa = base_gpa + (vm_start - hva);
+ phys_addr_t pa;
+
+ pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
+ pa += vm_start - vma->vm_start;
+
+ /* IO region dirty page logging not allowed */
+ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = gstage_ioremap(kvm, gpa, pa,
+ vm_end - vm_start, writable);
+ if (ret)
+ break;
+ }
+ hva = vm_end;
+ } while (hva < reg_end);
+
+ if (change == KVM_MR_FLAGS_ONLY)
+ goto out;
+
+ spin_lock(&kvm->mmu_lock);
+ if (ret)
+ gstage_unmap_range(kvm, base_gpa, size, false);
+ spin_unlock(&kvm->mmu_lock);
+
+out:
+ mmap_read_unlock(current->mm);
+ return ret;
+}
+
+bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ if (!kvm->arch.pgd)
+ return false;
+
+ gstage_unmap_range(kvm, range->start << PAGE_SHIFT,
+ (range->end - range->start) << PAGE_SHIFT,
+ range->may_block);
+ return false;
+}
+
+bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ int ret;
+ kvm_pfn_t pfn = pte_pfn(range->pte);
+
+ if (!kvm->arch.pgd)
+ return false;
+
+ WARN_ON(range->end - range->start != 1);
+
+ ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT,
+ __pfn_to_phys(pfn), PAGE_SIZE, true, true);
+ if (ret) {
+ kvm_debug("Failed to map G-stage page (error %d)\n", ret);
+ return true;
+ }
+
+ return false;
+}
+
+bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ pte_t *ptep;
+ u32 ptep_level = 0;
+ u64 size = (range->end - range->start) << PAGE_SHIFT;
+
+ if (!kvm->arch.pgd)
+ return false;
+
+ WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
+
+ if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+ &ptep, &ptep_level))
+ return false;
+
+ return ptep_test_and_clear_young(NULL, 0, ptep);
+}
+
+bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ pte_t *ptep;
+ u32 ptep_level = 0;
+ u64 size = (range->end - range->start) << PAGE_SHIFT;
+
+ if (!kvm->arch.pgd)
+ return false;
+
+ WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
+
+ if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+ &ptep, &ptep_level))
+ return false;
+
+ return pte_young(*ptep);
+}
+
+int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
+ struct kvm_memory_slot *memslot,
+ gpa_t gpa, unsigned long hva, bool is_write)
+{
+ int ret;
+ kvm_pfn_t hfn;
+ bool writeable;
+ short vma_pageshift;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct vm_area_struct *vma;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_mmu_memory_cache *pcache = &vcpu->arch.mmu_page_cache;
+ bool logging = (memslot->dirty_bitmap &&
+ !(memslot->flags & KVM_MEM_READONLY)) ? true : false;
+ unsigned long vma_pagesize, mmu_seq;
+
+ mmap_read_lock(current->mm);
+
+ vma = find_vma_intersection(current->mm, hva, hva + 1);
+ if (unlikely(!vma)) {
+ kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+ mmap_read_unlock(current->mm);
+ return -EFAULT;
+ }
+
+ if (is_vm_hugetlb_page(vma))
+ vma_pageshift = huge_page_shift(hstate_vma(vma));
+ else
+ vma_pageshift = PAGE_SHIFT;
+ vma_pagesize = 1ULL << vma_pageshift;
+ if (logging || (vma->vm_flags & VM_PFNMAP))
+ vma_pagesize = PAGE_SIZE;
+
+ if (vma_pagesize == PMD_SIZE || vma_pagesize == PGDIR_SIZE)
+ gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
+
+ mmap_read_unlock(current->mm);
+
+ if (vma_pagesize != PGDIR_SIZE &&
+ vma_pagesize != PMD_SIZE &&
+ vma_pagesize != PAGE_SIZE) {
+ kvm_err("Invalid VMA page size 0x%lx\n", vma_pagesize);
+ return -EFAULT;
+ }
+
+ /* We need minimum second+third level pages */
+ ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels);
+ if (ret) {
+ kvm_err("Failed to topup G-stage cache\n");
+ return ret;
+ }
+
+ mmu_seq = kvm->mmu_notifier_seq;
+
+ hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable);
+ if (hfn == KVM_PFN_ERR_HWPOISON) {
+ send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva,
+ vma_pageshift, current);
+ return 0;
+ }
+ if (is_error_noslot_pfn(hfn))
+ return -EFAULT;
+
+ /*
+ * If logging is active then we allow writable pages only
+ * for write faults.
+ */
+ if (logging && !is_write)
+ writeable = false;
+
+ spin_lock(&kvm->mmu_lock);
+
+ if (mmu_notifier_retry(kvm, mmu_seq))
+ goto out_unlock;
+
+ if (writeable) {
+ kvm_set_pfn_dirty(hfn);
+ mark_page_dirty(kvm, gfn);
+ ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
+ vma_pagesize, false, true);
+ } else {
+ ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
+ vma_pagesize, true, true);
+ }
+
+ if (ret)
+ kvm_err("Failed to map in G-stage\n");
+
+out_unlock:
+ spin_unlock(&kvm->mmu_lock);
+ kvm_set_pfn_accessed(hfn);
+ kvm_release_pfn_clean(hfn);
+ return ret;
+}
+
+int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm)
+{
+ struct page *pgd_page;
+
+ if (kvm->arch.pgd != NULL) {
+ kvm_err("kvm_arch already initialized?\n");
+ return -EINVAL;
+ }
+
+ pgd_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(gstage_pgd_size));
+ if (!pgd_page)
+ return -ENOMEM;
+ kvm->arch.pgd = page_to_virt(pgd_page);
+ kvm->arch.pgd_phys = page_to_phys(pgd_page);
+
+ return 0;
+}
+
+void kvm_riscv_gstage_free_pgd(struct kvm *kvm)
+{
+ void *pgd = NULL;
+
+ spin_lock(&kvm->mmu_lock);
+ if (kvm->arch.pgd) {
+ gstage_unmap_range(kvm, 0UL, gstage_gpa_size, false);
+ pgd = READ_ONCE(kvm->arch.pgd);
+ kvm->arch.pgd = NULL;
+ kvm->arch.pgd_phys = 0;
+ }
+ spin_unlock(&kvm->mmu_lock);
+
+ if (pgd)
+ free_pages((unsigned long)pgd, get_order(gstage_pgd_size));
+}
+
+void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu)
+{
+ unsigned long hgatp = gstage_mode;
+ struct kvm_arch *k = &vcpu->kvm->arch;
+
+ hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) &
+ HGATP_VMID_MASK;
+ hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
+
+ csr_write(CSR_HGATP, hgatp);
+
+ if (!kvm_riscv_gstage_vmid_bits())
+ kvm_riscv_local_hfence_gvma_all();
+}
+
+void kvm_riscv_gstage_mode_detect(void)
+{
+#ifdef CONFIG_64BIT
+ /* Try Sv57x4 G-stage mode */
+ csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
+ if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
+ gstage_mode = (HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
+ gstage_pgd_levels = 5;
+ goto skip_sv48x4_test;
+ }
+
+ /* Try Sv48x4 G-stage mode */
+ csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
+ if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
+ gstage_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
+ gstage_pgd_levels = 4;
+ }
+skip_sv48x4_test:
+
+ csr_write(CSR_HGATP, 0);
+ kvm_riscv_local_hfence_gvma_all();
+#endif
+}
+
+unsigned long kvm_riscv_gstage_mode(void)
+{
+ return gstage_mode >> HGATP_MODE_SHIFT;
+}
+
+int kvm_riscv_gstage_gpa_bits(void)
+{
+ return gstage_gpa_bits;
+}
diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
new file mode 100644
index 000000000000..1a76d0b1907d
--- /dev/null
+++ b/arch/riscv/kvm/tlb.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Ventana Micro Systems Inc.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/kvm_host.h>
+#include <asm/cacheflush.h>
+#include <asm/csr.h>
+
+/*
+ * Instruction encoding of hfence.gvma is:
+ * HFENCE.GVMA rs1, rs2
+ * HFENCE.GVMA zero, rs2
+ * HFENCE.GVMA rs1
+ * HFENCE.GVMA
+ *
+ * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2
+ * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2
+ * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1
+ * rs1==zero and rs2==zero ==> HFENCE.GVMA
+ *
+ * Instruction encoding of HFENCE.GVMA is:
+ * 0110001 rs2(5) rs1(5) 000 00000 1110011
+ */
+
+void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
+ gpa_t gpa, gpa_t gpsz,
+ unsigned long order)
+{
+ gpa_t pos;
+
+ if (PTRS_PER_PTE < (gpsz >> order)) {
+ kvm_riscv_local_hfence_gvma_vmid_all(vmid);
+ return;
+ }
+
+ for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) {
+ /*
+ * rs1 = a0 (GPA >> 2)
+ * rs2 = a1 (VMID)
+ * HFENCE.GVMA a0, a1
+ * 0110001 01011 01010 000 00000 1110011
+ */
+ asm volatile ("srli a0, %0, 2\n"
+ "add a1, %1, zero\n"
+ ".word 0x62b50073\n"
+ :: "r" (pos), "r" (vmid)
+ : "a0", "a1", "memory");
+ }
+}
+
+void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid)
+{
+ /*
+ * rs1 = zero
+ * rs2 = a0 (VMID)
+ * HFENCE.GVMA zero, a0
+ * 0110001 01010 00000 000 00000 1110011
+ */
+ asm volatile ("add a0, %0, zero\n"
+ ".word 0x62a00073\n"
+ :: "r" (vmid) : "a0", "memory");
+}
+
+void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz,
+ unsigned long order)
+{
+ gpa_t pos;
+
+ if (PTRS_PER_PTE < (gpsz >> order)) {
+ kvm_riscv_local_hfence_gvma_all();
+ return;
+ }
+
+ for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) {
+ /*
+ * rs1 = a0 (GPA >> 2)
+ * rs2 = zero
+ * HFENCE.GVMA a0
+ * 0110001 00000 01010 000 00000 1110011
+ */
+ asm volatile ("srli a0, %0, 2\n"
+ ".word 0x62050073\n"
+ :: "r" (pos) : "a0", "memory");
+ }
+}
+
+void kvm_riscv_local_hfence_gvma_all(void)
+{
+ /*
+ * rs1 = zero
+ * rs2 = zero
+ * HFENCE.GVMA
+ * 0110001 00000 00000 000 00000 1110011
+ */
+ asm volatile (".word 0x62000073" ::: "memory");
+}
+
+/*
+ * Instruction encoding of hfence.gvma is:
+ * HFENCE.VVMA rs1, rs2
+ * HFENCE.VVMA zero, rs2
+ * HFENCE.VVMA rs1
+ * HFENCE.VVMA
+ *
+ * rs1!=zero and rs2!=zero ==> HFENCE.VVMA rs1, rs2
+ * rs1==zero and rs2!=zero ==> HFENCE.VVMA zero, rs2
+ * rs1!=zero and rs2==zero ==> HFENCE.VVMA rs1
+ * rs1==zero and rs2==zero ==> HFENCE.VVMA
+ *
+ * Instruction encoding of HFENCE.VVMA is:
+ * 0010001 rs2(5) rs1(5) 000 00000 1110011
+ */
+
+void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid,
+ unsigned long asid,
+ unsigned long gva,
+ unsigned long gvsz,
+ unsigned long order)
+{
+ unsigned long pos, hgatp;
+
+ if (PTRS_PER_PTE < (gvsz >> order)) {
+ kvm_riscv_local_hfence_vvma_asid_all(vmid, asid);
+ return;
+ }
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) {
+ /*
+ * rs1 = a0 (GVA)
+ * rs2 = a1 (ASID)
+ * HFENCE.VVMA a0, a1
+ * 0010001 01011 01010 000 00000 1110011
+ */
+ asm volatile ("add a0, %0, zero\n"
+ "add a1, %1, zero\n"
+ ".word 0x22b50073\n"
+ :: "r" (pos), "r" (asid)
+ : "a0", "a1", "memory");
+ }
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid,
+ unsigned long asid)
+{
+ unsigned long hgatp;
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ /*
+ * rs1 = zero
+ * rs2 = a0 (ASID)
+ * HFENCE.VVMA zero, a0
+ * 0010001 01010 00000 000 00000 1110011
+ */
+ asm volatile ("add a0, %0, zero\n"
+ ".word 0x22a00073\n"
+ :: "r" (asid) : "a0", "memory");
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order)
+{
+ unsigned long pos, hgatp;
+
+ if (PTRS_PER_PTE < (gvsz >> order)) {
+ kvm_riscv_local_hfence_vvma_all(vmid);
+ return;
+ }
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) {
+ /*
+ * rs1 = a0 (GVA)
+ * rs2 = zero
+ * HFENCE.VVMA a0
+ * 0010001 00000 01010 000 00000 1110011
+ */
+ asm volatile ("add a0, %0, zero\n"
+ ".word 0x22050073\n"
+ :: "r" (pos) : "a0", "memory");
+ }
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_hfence_vvma_all(unsigned long vmid)
+{
+ unsigned long hgatp;
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ /*
+ * rs1 = zero
+ * rs2 = zero
+ * HFENCE.VVMA
+ * 0010001 00000 00000 000 00000 1110011
+ */
+ asm volatile (".word 0x22000073" ::: "memory");
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu)
+{
+ unsigned long vmid;
+
+ if (!kvm_riscv_gstage_vmid_bits() ||
+ vcpu->arch.last_exit_cpu == vcpu->cpu)
+ return;
+
+ /*
+ * On RISC-V platforms with hardware VMID support, we share same
+ * VMID for all VCPUs of a particular Guest/VM. This means we might
+ * have stale G-stage TLB entries on the current Host CPU due to
+ * some other VCPU of the same Guest which ran previously on the
+ * current Host CPU.
+ *
+ * To cleanup stale TLB entries, we simply flush all G-stage TLB
+ * entries by VMID whenever underlying Host CPU changes for a VCPU.
+ */
+
+ vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
+ kvm_riscv_local_hfence_gvma_vmid_all(vmid);
+}
+
+void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu)
+{
+ local_flush_icache_all();
+}
+
+void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vmid *vmid;
+
+ vmid = &vcpu->kvm->arch.vmid;
+ kvm_riscv_local_hfence_gvma_vmid_all(READ_ONCE(vmid->vmid));
+}
+
+void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vmid *vmid;
+
+ vmid = &vcpu->kvm->arch.vmid;
+ kvm_riscv_local_hfence_vvma_all(READ_ONCE(vmid->vmid));
+}
+
+static bool vcpu_hfence_dequeue(struct kvm_vcpu *vcpu,
+ struct kvm_riscv_hfence *out_data)
+{
+ bool ret = false;
+ struct kvm_vcpu_arch *varch = &vcpu->arch;
+
+ spin_lock(&varch->hfence_lock);
+
+ if (varch->hfence_queue[varch->hfence_head].type) {
+ memcpy(out_data, &varch->hfence_queue[varch->hfence_head],
+ sizeof(*out_data));
+ varch->hfence_queue[varch->hfence_head].type = 0;
+
+ varch->hfence_head++;
+ if (varch->hfence_head == KVM_RISCV_VCPU_MAX_HFENCE)
+ varch->hfence_head = 0;
+
+ ret = true;
+ }
+
+ spin_unlock(&varch->hfence_lock);
+
+ return ret;
+}
+
+static bool vcpu_hfence_enqueue(struct kvm_vcpu *vcpu,
+ const struct kvm_riscv_hfence *data)
+{
+ bool ret = false;
+ struct kvm_vcpu_arch *varch = &vcpu->arch;
+
+ spin_lock(&varch->hfence_lock);
+
+ if (!varch->hfence_queue[varch->hfence_tail].type) {
+ memcpy(&varch->hfence_queue[varch->hfence_tail],
+ data, sizeof(*data));
+
+ varch->hfence_tail++;
+ if (varch->hfence_tail == KVM_RISCV_VCPU_MAX_HFENCE)
+ varch->hfence_tail = 0;
+
+ ret = true;
+ }
+
+ spin_unlock(&varch->hfence_lock);
+
+ return ret;
+}
+
+void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu)
+{
+ struct kvm_riscv_hfence d = { 0 };
+ struct kvm_vmid *v = &vcpu->kvm->arch.vmid;
+
+ while (vcpu_hfence_dequeue(vcpu, &d)) {
+ switch (d.type) {
+ case KVM_RISCV_HFENCE_UNKNOWN:
+ break;
+ case KVM_RISCV_HFENCE_GVMA_VMID_GPA:
+ kvm_riscv_local_hfence_gvma_vmid_gpa(
+ READ_ONCE(v->vmid),
+ d.addr, d.size, d.order);
+ break;
+ case KVM_RISCV_HFENCE_VVMA_ASID_GVA:
+ kvm_riscv_local_hfence_vvma_asid_gva(
+ READ_ONCE(v->vmid), d.asid,
+ d.addr, d.size, d.order);
+ break;
+ case KVM_RISCV_HFENCE_VVMA_ASID_ALL:
+ kvm_riscv_local_hfence_vvma_asid_all(
+ READ_ONCE(v->vmid), d.asid);
+ break;
+ case KVM_RISCV_HFENCE_VVMA_GVA:
+ kvm_riscv_local_hfence_vvma_gva(
+ READ_ONCE(v->vmid),
+ d.addr, d.size, d.order);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void make_xfence_request(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned int req, unsigned int fallback_req,
+ const struct kvm_riscv_hfence *data)
+{
+ unsigned long i;
+ struct kvm_vcpu *vcpu;
+ unsigned int actual_req = req;
+ DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
+
+ bitmap_clear(vcpu_mask, 0, KVM_MAX_VCPUS);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (hbase != -1UL) {
+ if (vcpu->vcpu_id < hbase)
+ continue;
+ if (!(hmask & (1UL << (vcpu->vcpu_id - hbase))))
+ continue;
+ }
+
+ bitmap_set(vcpu_mask, i, 1);
+
+ if (!data || !data->type)
+ continue;
+
+ /*
+ * Enqueue hfence data to VCPU hfence queue. If we don't
+ * have space in the VCPU hfence queue then fallback to
+ * a more conservative hfence request.
+ */
+ if (!vcpu_hfence_enqueue(vcpu, data))
+ actual_req = fallback_req;
+ }
+
+ kvm_make_vcpus_request_mask(kvm, actual_req, vcpu_mask);
+}
+
+void kvm_riscv_fence_i(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask)
+{
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_FENCE_I,
+ KVM_REQ_FENCE_I, NULL);
+}
+
+void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ gpa_t gpa, gpa_t gpsz,
+ unsigned long order)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_GVMA_VMID_GPA;
+ data.asid = 0;
+ data.addr = gpa;
+ data.size = gpsz;
+ data.order = order;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_GVMA_VMID_ALL, &data);
+}
+
+void kvm_riscv_hfence_gvma_vmid_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask)
+{
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_GVMA_VMID_ALL,
+ KVM_REQ_HFENCE_GVMA_VMID_ALL, NULL);
+}
+
+void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order, unsigned long asid)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_VVMA_ASID_GVA;
+ data.asid = asid;
+ data.addr = gva;
+ data.size = gvsz;
+ data.order = order;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_VVMA_ALL, &data);
+}
+
+void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long asid)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_VVMA_ASID_ALL;
+ data.asid = asid;
+ data.addr = data.size = data.order = 0;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_VVMA_ALL, &data);
+}
+
+void kvm_riscv_hfence_vvma_gva(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_VVMA_GVA;
+ data.asid = 0;
+ data.addr = gva;
+ data.size = gvsz;
+ data.order = order;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_VVMA_ALL, &data);
+}
+
+void kvm_riscv_hfence_vvma_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask)
+{
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_VVMA_ALL,
+ KVM_REQ_HFENCE_VVMA_ALL, NULL);
+}
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
new file mode 100644
index 000000000000..7f4ad5e4373a
--- /dev/null
+++ b/arch/riscv/kvm/vcpu.c
@@ -0,0 +1,994 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/sched/signal.h>
+#include <linux/fs.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/hwcap.h>
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
+ STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
+ STATS_DESC_COUNTER(VCPU, mmio_exit_user),
+ STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
+ STATS_DESC_COUNTER(VCPU, exits)
+};
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
+};
+
+#define KVM_RISCV_ISA_DISABLE_ALLOWED (riscv_isa_extension_mask(d) | \
+ riscv_isa_extension_mask(f))
+
+#define KVM_RISCV_ISA_DISABLE_NOT_ALLOWED (riscv_isa_extension_mask(a) | \
+ riscv_isa_extension_mask(c) | \
+ riscv_isa_extension_mask(i) | \
+ riscv_isa_extension_mask(m))
+
+#define KVM_RISCV_ISA_ALLOWED (KVM_RISCV_ISA_DISABLE_ALLOWED | \
+ KVM_RISCV_ISA_DISABLE_NOT_ALLOWED)
+
+static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+ struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
+ bool loaded;
+
+ /**
+ * The preemption should be disabled here because it races with
+ * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
+ * also calls vcpu_load/put.
+ */
+ get_cpu();
+ loaded = (vcpu->cpu != -1);
+ if (loaded)
+ kvm_arch_vcpu_put(vcpu);
+
+ vcpu->arch.last_exit_cpu = -1;
+
+ memcpy(csr, reset_csr, sizeof(*csr));
+
+ memcpy(cntx, reset_cntx, sizeof(*cntx));
+
+ kvm_riscv_vcpu_fp_reset(vcpu);
+
+ kvm_riscv_vcpu_timer_reset(vcpu);
+
+ WRITE_ONCE(vcpu->arch.irqs_pending, 0);
+ WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
+
+ vcpu->arch.hfence_head = 0;
+ vcpu->arch.hfence_tail = 0;
+ memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
+
+ /* Reset the guest CSRs for hotplug usecase */
+ if (loaded)
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ put_cpu();
+}
+
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *cntx;
+ struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
+
+ /* Mark this VCPU never ran */
+ vcpu->arch.ran_atleast_once = false;
+ vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
+
+ /* Setup ISA features available to VCPU */
+ vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
+
+ /* Setup VCPU hfence queue */
+ spin_lock_init(&vcpu->arch.hfence_lock);
+
+ /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
+ cntx = &vcpu->arch.guest_reset_context;
+ cntx->sstatus = SR_SPP | SR_SPIE;
+ cntx->hstatus = 0;
+ cntx->hstatus |= HSTATUS_VTW;
+ cntx->hstatus |= HSTATUS_SPVP;
+ cntx->hstatus |= HSTATUS_SPV;
+
+ /* By default, make CY, TM, and IR counters accessible in VU mode */
+ reset_csr->scounteren = 0x7;
+
+ /* Setup VCPU timer */
+ kvm_riscv_vcpu_timer_init(vcpu);
+
+ /* Reset VCPU */
+ kvm_riscv_reset_vcpu(vcpu);
+
+ return 0;
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+ /**
+ * vcpu with id 0 is the designated boot cpu.
+ * Keep all vcpus with non-zero id in power-off state so that
+ * they can be brought up using SBI HSM extension.
+ */
+ if (vcpu->vcpu_idx != 0)
+ kvm_riscv_vcpu_power_off(vcpu);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ /* Cleanup VCPU timer */
+ kvm_riscv_vcpu_timer_deinit(vcpu);
+
+ /* Free unused pages pre-allocated for G-stage page table mappings */
+ kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER);
+}
+
+void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
+{
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+ return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
+ !vcpu->arch.power_off && !vcpu->arch.pause);
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
+}
+
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
+}
+
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
+static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CONFIG);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ switch (reg_num) {
+ case KVM_REG_RISCV_CONFIG_REG(isa):
+ reg_val = vcpu->arch.isa;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CONFIG);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ switch (reg_num) {
+ case KVM_REG_RISCV_CONFIG_REG(isa):
+ if (!vcpu->arch.ran_atleast_once) {
+ /* Ignore the disable request for these extensions */
+ vcpu->arch.isa = reg_val | KVM_RISCV_ISA_DISABLE_NOT_ALLOWED;
+ vcpu->arch.isa &= riscv_isa_extension_base(NULL);
+ vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
+ kvm_riscv_vcpu_fp_reset(vcpu);
+ } else {
+ return -EOPNOTSUPP;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CORE);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+ return -EINVAL;
+
+ if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+ reg_val = cntx->sepc;
+ else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+ reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+ reg_val = ((unsigned long *)cntx)[reg_num];
+ else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
+ reg_val = (cntx->sstatus & SR_SPP) ?
+ KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
+ else
+ return -EINVAL;
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CORE);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+ cntx->sepc = reg_val;
+ else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+ reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+ ((unsigned long *)cntx)[reg_num] = reg_val;
+ else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
+ if (reg_val == KVM_RISCV_MODE_S)
+ cntx->sstatus |= SR_SPP;
+ else
+ cntx->sstatus &= ~SR_SPP;
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CSR);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+ return -EINVAL;
+
+ if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+ kvm_riscv_vcpu_flush_interrupts(vcpu);
+ reg_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
+ } else
+ reg_val = ((unsigned long *)csr)[reg_num];
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CSR);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+ reg_val &= VSIP_VALID_MASK;
+ reg_val <<= VSIP_TO_HVIP_SHIFT;
+ }
+
+ ((unsigned long *)csr)[reg_num] = reg_val;
+
+ if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
+ WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
+
+ return 0;
+}
+
+/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
+static unsigned long kvm_isa_ext_arr[] = {
+ RISCV_ISA_EXT_a,
+ RISCV_ISA_EXT_c,
+ RISCV_ISA_EXT_d,
+ RISCV_ISA_EXT_f,
+ RISCV_ISA_EXT_h,
+ RISCV_ISA_EXT_i,
+ RISCV_ISA_EXT_m,
+};
+
+static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_ISA_EXT);
+ unsigned long reg_val = 0;
+ unsigned long host_isa_ext;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (reg_num >= KVM_RISCV_ISA_EXT_MAX || reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+ return -EINVAL;
+
+ host_isa_ext = kvm_isa_ext_arr[reg_num];
+ if (__riscv_isa_extension_available(&vcpu->arch.isa, host_isa_ext))
+ reg_val = 1; /* Mark the given extension as available */
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_ISA_EXT);
+ unsigned long reg_val;
+ unsigned long host_isa_ext;
+ unsigned long host_isa_ext_mask;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (reg_num >= KVM_RISCV_ISA_EXT_MAX || reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ host_isa_ext = kvm_isa_ext_arr[reg_num];
+ if (!__riscv_isa_extension_available(NULL, host_isa_ext))
+ return -EOPNOTSUPP;
+
+ if (host_isa_ext >= RISCV_ISA_EXT_BASE &&
+ host_isa_ext < RISCV_ISA_EXT_MAX) {
+ /*
+ * Multi-letter ISA extension. Currently there is no provision
+ * to enable/disable the multi-letter ISA extensions for guests.
+ * Return success if the request is to enable any ISA extension
+ * that is available in the hardware.
+ * Return -EOPNOTSUPP otherwise.
+ */
+ if (!reg_val)
+ return -EOPNOTSUPP;
+ else
+ return 0;
+ }
+
+ /* Single letter base ISA extension */
+ if (!vcpu->arch.ran_atleast_once) {
+ host_isa_ext_mask = BIT_MASK(host_isa_ext);
+ if (!reg_val && (host_isa_ext_mask & KVM_RISCV_ISA_DISABLE_ALLOWED))
+ vcpu->arch.isa &= ~host_isa_ext_mask;
+ else
+ vcpu->arch.isa |= host_isa_ext_mask;
+ vcpu->arch.isa &= riscv_isa_extension_base(NULL);
+ vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
+ kvm_riscv_vcpu_fp_reset(vcpu);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
+ return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
+ return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
+ return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
+ return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
+ return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_F);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
+ return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_D);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
+ return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
+
+ return -EINVAL;
+}
+
+static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
+ return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
+ return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
+ return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
+ return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
+ return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_F);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
+ return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_D);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
+ return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
+
+ return -EINVAL;
+}
+
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+
+ if (ioctl == KVM_INTERRUPT) {
+ struct kvm_interrupt irq;
+
+ if (copy_from_user(&irq, argp, sizeof(irq)))
+ return -EFAULT;
+
+ if (irq.irq == KVM_INTERRUPT_SET)
+ return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
+ else
+ return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
+ }
+
+ return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ long r = -EINVAL;
+
+ switch (ioctl) {
+ case KVM_SET_ONE_REG:
+ case KVM_GET_ONE_REG: {
+ struct kvm_one_reg reg;
+
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ break;
+
+ if (ioctl == KVM_SET_ONE_REG)
+ r = kvm_riscv_vcpu_set_reg(vcpu, &reg);
+ else
+ r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ return -EINVAL;
+}
+
+void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+ unsigned long mask, val;
+
+ if (READ_ONCE(vcpu->arch.irqs_pending_mask)) {
+ mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0);
+ val = READ_ONCE(vcpu->arch.irqs_pending) & mask;
+
+ csr->hvip &= ~mask;
+ csr->hvip |= val;
+ }
+}
+
+void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
+{
+ unsigned long hvip;
+ struct kvm_vcpu_arch *v = &vcpu->arch;
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+ /* Read current HVIP and VSIE CSRs */
+ csr->vsie = csr_read(CSR_VSIE);
+
+ /* Sync-up HVIP.VSSIP bit changes does by Guest */
+ hvip = csr_read(CSR_HVIP);
+ if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
+ if (hvip & (1UL << IRQ_VS_SOFT)) {
+ if (!test_and_set_bit(IRQ_VS_SOFT,
+ &v->irqs_pending_mask))
+ set_bit(IRQ_VS_SOFT, &v->irqs_pending);
+ } else {
+ if (!test_and_set_bit(IRQ_VS_SOFT,
+ &v->irqs_pending_mask))
+ clear_bit(IRQ_VS_SOFT, &v->irqs_pending);
+ }
+ }
+}
+
+int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
+{
+ if (irq != IRQ_VS_SOFT &&
+ irq != IRQ_VS_TIMER &&
+ irq != IRQ_VS_EXT)
+ return -EINVAL;
+
+ set_bit(irq, &vcpu->arch.irqs_pending);
+ smp_mb__before_atomic();
+ set_bit(irq, &vcpu->arch.irqs_pending_mask);
+
+ kvm_vcpu_kick(vcpu);
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
+{
+ if (irq != IRQ_VS_SOFT &&
+ irq != IRQ_VS_TIMER &&
+ irq != IRQ_VS_EXT)
+ return -EINVAL;
+
+ clear_bit(irq, &vcpu->arch.irqs_pending);
+ smp_mb__before_atomic();
+ set_bit(irq, &vcpu->arch.irqs_pending_mask);
+
+ return 0;
+}
+
+bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask)
+{
+ unsigned long ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
+ << VSIP_TO_HVIP_SHIFT) & mask;
+
+ return (READ_ONCE(vcpu->arch.irqs_pending) & ie) ? true : false;
+}
+
+void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.power_off = true;
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
+void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.power_off = false;
+ kvm_vcpu_wake_up(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ if (vcpu->arch.power_off)
+ mp_state->mp_state = KVM_MP_STATE_STOPPED;
+ else
+ mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ int ret = 0;
+
+ switch (mp_state->mp_state) {
+ case KVM_MP_STATE_RUNNABLE:
+ vcpu->arch.power_off = false;
+ break;
+ case KVM_MP_STATE_STOPPED:
+ kvm_riscv_vcpu_power_off(vcpu);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ /* TODO; To be implemented later. */
+ return -EINVAL;
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+ csr_write(CSR_VSSTATUS, csr->vsstatus);
+ csr_write(CSR_VSIE, csr->vsie);
+ csr_write(CSR_VSTVEC, csr->vstvec);
+ csr_write(CSR_VSSCRATCH, csr->vsscratch);
+ csr_write(CSR_VSEPC, csr->vsepc);
+ csr_write(CSR_VSCAUSE, csr->vscause);
+ csr_write(CSR_VSTVAL, csr->vstval);
+ csr_write(CSR_HVIP, csr->hvip);
+ csr_write(CSR_VSATP, csr->vsatp);
+
+ kvm_riscv_gstage_update_hgatp(vcpu);
+
+ kvm_riscv_vcpu_timer_restore(vcpu);
+
+ kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
+ kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
+ vcpu->arch.isa);
+
+ vcpu->cpu = cpu;
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+ vcpu->cpu = -1;
+
+ kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
+ vcpu->arch.isa);
+ kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
+
+ csr->vsstatus = csr_read(CSR_VSSTATUS);
+ csr->vsie = csr_read(CSR_VSIE);
+ csr->vstvec = csr_read(CSR_VSTVEC);
+ csr->vsscratch = csr_read(CSR_VSSCRATCH);
+ csr->vsepc = csr_read(CSR_VSEPC);
+ csr->vscause = csr_read(CSR_VSCAUSE);
+ csr->vstval = csr_read(CSR_VSTVAL);
+ csr->hvip = csr_read(CSR_HVIP);
+ csr->vsatp = csr_read(CSR_VSATP);
+}
+
+static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
+{
+ struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
+
+ if (kvm_request_pending(vcpu)) {
+ if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
+ rcuwait_wait_event(wait,
+ (!vcpu->arch.power_off) && (!vcpu->arch.pause),
+ TASK_INTERRUPTIBLE);
+
+ if (vcpu->arch.power_off || vcpu->arch.pause) {
+ /*
+ * Awaken to handle a signal, request to
+ * sleep again later.
+ */
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ }
+ }
+
+ if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
+ kvm_riscv_reset_vcpu(vcpu);
+
+ if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
+ kvm_riscv_gstage_update_hgatp(vcpu);
+
+ if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
+ kvm_riscv_fence_i_process(vcpu);
+
+ /*
+ * The generic KVM_REQ_TLB_FLUSH is same as
+ * KVM_REQ_HFENCE_GVMA_VMID_ALL
+ */
+ if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
+ kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
+
+ if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
+ kvm_riscv_hfence_vvma_all_process(vcpu);
+
+ if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
+ kvm_riscv_hfence_process(vcpu);
+ }
+}
+
+static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+ csr_write(CSR_HVIP, csr->hvip);
+}
+
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+ guest_state_enter_irqoff();
+ __kvm_riscv_switch_to(&vcpu->arch);
+ vcpu->arch.last_exit_cpu = vcpu->cpu;
+ guest_state_exit_irqoff();
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+{
+ int ret;
+ struct kvm_cpu_trap trap;
+ struct kvm_run *run = vcpu->run;
+
+ /* Mark this VCPU ran at least once */
+ vcpu->arch.ran_atleast_once = true;
+
+ kvm_vcpu_srcu_read_lock(vcpu);
+
+ /* Process MMIO value returned from user-space */
+ if (run->exit_reason == KVM_EXIT_MMIO) {
+ ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
+ if (ret) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ return ret;
+ }
+ }
+
+ /* Process SBI value returned from user-space */
+ if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
+ ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
+ if (ret) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ return ret;
+ }
+ }
+
+ if (run->immediate_exit) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ return -EINTR;
+ }
+
+ vcpu_load(vcpu);
+
+ kvm_sigset_activate(vcpu);
+
+ ret = 1;
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ while (ret > 0) {
+ /* Check conditions before entering the guest */
+ cond_resched();
+
+ kvm_riscv_gstage_vmid_update(vcpu);
+
+ kvm_riscv_check_vcpu_requests(vcpu);
+
+ preempt_disable();
+
+ local_irq_disable();
+
+ /*
+ * Exit if we have a signal pending so that we can deliver
+ * the signal to user space.
+ */
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ run->exit_reason = KVM_EXIT_INTR;
+ }
+
+ /*
+ * Ensure we set mode to IN_GUEST_MODE after we disable
+ * interrupts and before the final VCPU requests check.
+ * See the comment in kvm_vcpu_exiting_guest_mode() and
+ * Documentation/virt/kvm/vcpu-requests.rst
+ */
+ vcpu->mode = IN_GUEST_MODE;
+
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ smp_mb__after_srcu_read_unlock();
+
+ /*
+ * We might have got VCPU interrupts updated asynchronously
+ * so update it in HW.
+ */
+ kvm_riscv_vcpu_flush_interrupts(vcpu);
+
+ /* Update HVIP CSR for current CPU */
+ kvm_riscv_update_hvip(vcpu);
+
+ if (ret <= 0 ||
+ kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
+ kvm_request_pending(vcpu)) {
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ local_irq_enable();
+ preempt_enable();
+ kvm_vcpu_srcu_read_lock(vcpu);
+ continue;
+ }
+
+ /*
+ * Cleanup stale TLB enteries
+ *
+ * Note: This should be done after G-stage VMID has been
+ * updated using kvm_riscv_gstage_vmid_ver_changed()
+ */
+ kvm_riscv_local_tlb_sanitize(vcpu);
+
+ guest_timing_enter_irqoff();
+
+ kvm_riscv_vcpu_enter_exit(vcpu);
+
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ vcpu->stat.exits++;
+
+ /*
+ * Save SCAUSE, STVAL, HTVAL, and HTINST because we might
+ * get an interrupt between __kvm_riscv_switch_to() and
+ * local_irq_enable() which can potentially change CSRs.
+ */
+ trap.sepc = vcpu->arch.guest_context.sepc;
+ trap.scause = csr_read(CSR_SCAUSE);
+ trap.stval = csr_read(CSR_STVAL);
+ trap.htval = csr_read(CSR_HTVAL);
+ trap.htinst = csr_read(CSR_HTINST);
+
+ /* Syncup interrupts state with HW */
+ kvm_riscv_vcpu_sync_interrupts(vcpu);
+
+ /*
+ * We must ensure that any pending interrupts are taken before
+ * we exit guest timing so that timer ticks are accounted as
+ * guest time. Transiently unmask interrupts so that any
+ * pending interrupts are taken.
+ *
+ * There's no barrier which ensures that pending interrupts are
+ * recognised, so we just hope that the CPU takes any pending
+ * interrupts between the enable and disable.
+ */
+ local_irq_enable();
+ local_irq_disable();
+
+ guest_timing_exit_irqoff();
+
+ local_irq_enable();
+
+ preempt_enable();
+
+ kvm_vcpu_srcu_read_lock(vcpu);
+
+ ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
+ }
+
+ kvm_sigset_deactivate(vcpu);
+
+ vcpu_put(vcpu);
+
+ kvm_vcpu_srcu_read_unlock(vcpu);
+
+ return ret;
+}
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
new file mode 100644
index 000000000000..dbb09afd7546
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -0,0 +1,711 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+
+#define INSN_OPCODE_MASK 0x007c
+#define INSN_OPCODE_SHIFT 2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_WFI 0xffffffff
+#define INSN_MATCH_WFI 0x10500073
+
+#define INSN_MATCH_LB 0x3
+#define INSN_MASK_LB 0x707f
+#define INSN_MATCH_LH 0x1003
+#define INSN_MASK_LH 0x707f
+#define INSN_MATCH_LW 0x2003
+#define INSN_MASK_LW 0x707f
+#define INSN_MATCH_LD 0x3003
+#define INSN_MASK_LD 0x707f
+#define INSN_MATCH_LBU 0x4003
+#define INSN_MASK_LBU 0x707f
+#define INSN_MATCH_LHU 0x5003
+#define INSN_MASK_LHU 0x707f
+#define INSN_MATCH_LWU 0x6003
+#define INSN_MASK_LWU 0x707f
+#define INSN_MATCH_SB 0x23
+#define INSN_MASK_SB 0x707f
+#define INSN_MATCH_SH 0x1023
+#define INSN_MASK_SH 0x707f
+#define INSN_MATCH_SW 0x2023
+#define INSN_MASK_SW 0x707f
+#define INSN_MATCH_SD 0x3023
+#define INSN_MASK_SD 0x707f
+
+#define INSN_MATCH_C_LD 0x6000
+#define INSN_MASK_C_LD 0xe003
+#define INSN_MATCH_C_SD 0xe000
+#define INSN_MASK_C_SD 0xe003
+#define INSN_MATCH_C_LW 0x4000
+#define INSN_MASK_C_LW 0xe003
+#define INSN_MATCH_C_SW 0xc000
+#define INSN_MASK_C_SW 0xe003
+#define INSN_MATCH_C_LDSP 0x6002
+#define INSN_MASK_C_LDSP 0xe003
+#define INSN_MATCH_C_SDSP 0xe002
+#define INSN_MASK_C_SDSP 0xe003
+#define INSN_MATCH_C_LWSP 0x4002
+#define INSN_MASK_C_LWSP 0xe003
+#define INSN_MATCH_C_SWSP 0xc002
+#define INSN_MASK_C_SWSP 0xe003
+
+#define INSN_16BIT_MASK 0x3
+
+#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK)
+
+#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4)
+
+#ifdef CONFIG_64BIT
+#define LOG_REGBYTES 3
+#else
+#define LOG_REGBYTES 2
+#endif
+#define REGBYTES (1 << LOG_REGBYTES)
+
+#define SH_RD 7
+#define SH_RS1 15
+#define SH_RS2 20
+#define SH_RS2C 2
+
+#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
+#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \
+ (RV_X(x, 10, 3) << 3) | \
+ (RV_X(x, 5, 1) << 6))
+#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \
+ (RV_X(x, 5, 2) << 6))
+#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \
+ (RV_X(x, 12, 1) << 5) | \
+ (RV_X(x, 2, 2) << 6))
+#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \
+ (RV_X(x, 12, 1) << 5) | \
+ (RV_X(x, 2, 3) << 6))
+#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \
+ (RV_X(x, 7, 2) << 6))
+#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \
+ (RV_X(x, 7, 3) << 6))
+#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3))
+#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3))
+#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5)
+
+#define SHIFT_RIGHT(x, y) \
+ ((y) < 0 ? ((x) << -(y)) : ((x) >> (y)))
+
+#define REG_MASK \
+ ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES))
+
+#define REG_OFFSET(insn, pos) \
+ (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK)
+
+#define REG_PTR(insn, pos, regs) \
+ ((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)))
+
+#define GET_RM(insn) (((insn) >> 12) & 7)
+
+#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
+#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
+#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
+#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs))
+#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs))
+#define GET_SP(regs) (*REG_PTR(2, 0, regs))
+#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val))
+#define IMM_I(insn) ((s32)(insn) >> 20)
+#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \
+ (s32)(((insn) >> 7) & 0x1f))
+#define MASK_FUNCT3 0x7000
+
+static int truly_illegal_insn(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ ulong insn)
+{
+ struct kvm_cpu_trap utrap = { 0 };
+
+ /* Redirect trap to Guest VCPU */
+ utrap.sepc = vcpu->arch.guest_context.sepc;
+ utrap.scause = EXC_INST_ILLEGAL;
+ utrap.stval = insn;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+
+ return 1;
+}
+
+static int system_opcode_insn(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ ulong insn)
+{
+ if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) {
+ vcpu->stat.wfi_exit_stat++;
+ kvm_riscv_vcpu_wfi(vcpu);
+ vcpu->arch.guest_context.sepc += INSN_LEN(insn);
+ return 1;
+ }
+
+ return truly_illegal_insn(vcpu, run, insn);
+}
+
+static int virtual_inst_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_cpu_trap *trap)
+{
+ unsigned long insn = trap->stval;
+ struct kvm_cpu_trap utrap = { 0 };
+ struct kvm_cpu_context *ct;
+
+ if (unlikely(INSN_IS_16BIT(insn))) {
+ if (insn == 0) {
+ ct = &vcpu->arch.guest_context;
+ insn = kvm_riscv_vcpu_unpriv_read(vcpu, true,
+ ct->sepc,
+ &utrap);
+ if (utrap.scause) {
+ utrap.sepc = ct->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ return 1;
+ }
+ }
+ if (INSN_IS_16BIT(insn))
+ return truly_illegal_insn(vcpu, run, insn);
+ }
+
+ switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) {
+ case INSN_OPCODE_SYSTEM:
+ return system_opcode_insn(vcpu, run, insn);
+ default:
+ return truly_illegal_insn(vcpu, run, insn);
+ }
+}
+
+static int emulate_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long fault_addr, unsigned long htinst)
+{
+ u8 data_buf[8];
+ unsigned long insn;
+ int shift = 0, len = 0, insn_len = 0;
+ struct kvm_cpu_trap utrap = { 0 };
+ struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
+
+ /* Determine trapped instruction */
+ if (htinst & 0x1) {
+ /*
+ * Bit[0] == 1 implies trapped instruction value is
+ * transformed instruction or custom instruction.
+ */
+ insn = htinst | INSN_16BIT_MASK;
+ insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
+ } else {
+ /*
+ * Bit[0] == 0 implies trapped instruction value is
+ * zero or special value.
+ */
+ insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
+ &utrap);
+ if (utrap.scause) {
+ /* Redirect trap if we failed to read instruction */
+ utrap.sepc = ct->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ return 1;
+ }
+ insn_len = INSN_LEN(insn);
+ }
+
+ /* Decode length of MMIO and shift */
+ if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
+ len = 4;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LB) == INSN_MATCH_LB) {
+ len = 1;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) {
+ len = 1;
+ shift = 8 * (sizeof(ulong) - len);
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) {
+ len = 8;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) {
+ len = 4;
+#endif
+ } else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) {
+ len = 2;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) {
+ len = 2;
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) {
+ len = 8;
+ shift = 8 * (sizeof(ulong) - len);
+ insn = RVC_RS2S(insn) << SH_RD;
+ } else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 8;
+ shift = 8 * (sizeof(ulong) - len);
+#endif
+ } else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) {
+ len = 4;
+ shift = 8 * (sizeof(ulong) - len);
+ insn = RVC_RS2S(insn) << SH_RD;
+ } else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 4;
+ shift = 8 * (sizeof(ulong) - len);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ /* Fault address should be aligned to length of MMIO */
+ if (fault_addr & (len - 1))
+ return -EIO;
+
+ /* Save instruction decode info */
+ vcpu->arch.mmio_decode.insn = insn;
+ vcpu->arch.mmio_decode.insn_len = insn_len;
+ vcpu->arch.mmio_decode.shift = shift;
+ vcpu->arch.mmio_decode.len = len;
+ vcpu->arch.mmio_decode.return_handled = 0;
+
+ /* Update MMIO details in kvm_run struct */
+ run->mmio.is_write = false;
+ run->mmio.phys_addr = fault_addr;
+ run->mmio.len = len;
+
+ /* Try to handle MMIO access in the kernel */
+ if (!kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_addr, len, data_buf)) {
+ /* Successfully handled MMIO access in the kernel so resume */
+ memcpy(run->mmio.data, data_buf, len);
+ vcpu->stat.mmio_exit_kernel++;
+ kvm_riscv_vcpu_mmio_return(vcpu, run);
+ return 1;
+ }
+
+ /* Exit to userspace for MMIO emulation */
+ vcpu->stat.mmio_exit_user++;
+ run->exit_reason = KVM_EXIT_MMIO;
+
+ return 0;
+}
+
+static int emulate_store(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long fault_addr, unsigned long htinst)
+{
+ u8 data8;
+ u16 data16;
+ u32 data32;
+ u64 data64;
+ ulong data;
+ unsigned long insn;
+ int len = 0, insn_len = 0;
+ struct kvm_cpu_trap utrap = { 0 };
+ struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
+
+ /* Determine trapped instruction */
+ if (htinst & 0x1) {
+ /*
+ * Bit[0] == 1 implies trapped instruction value is
+ * transformed instruction or custom instruction.
+ */
+ insn = htinst | INSN_16BIT_MASK;
+ insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
+ } else {
+ /*
+ * Bit[0] == 0 implies trapped instruction value is
+ * zero or special value.
+ */
+ insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
+ &utrap);
+ if (utrap.scause) {
+ /* Redirect trap if we failed to read instruction */
+ utrap.sepc = ct->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ return 1;
+ }
+ insn_len = INSN_LEN(insn);
+ }
+
+ data = GET_RS2(insn, &vcpu->arch.guest_context);
+ data8 = data16 = data32 = data64 = data;
+
+ if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) {
+ len = 4;
+ } else if ((insn & INSN_MASK_SB) == INSN_MATCH_SB) {
+ len = 1;
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
+ len = 8;
+#endif
+ } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
+ len = 2;
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) {
+ len = 8;
+ data64 = GET_RS2S(insn, &vcpu->arch.guest_context);
+ } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 8;
+ data64 = GET_RS2C(insn, &vcpu->arch.guest_context);
+#endif
+ } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) {
+ len = 4;
+ data32 = GET_RS2S(insn, &vcpu->arch.guest_context);
+ } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 4;
+ data32 = GET_RS2C(insn, &vcpu->arch.guest_context);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ /* Fault address should be aligned to length of MMIO */
+ if (fault_addr & (len - 1))
+ return -EIO;
+
+ /* Save instruction decode info */
+ vcpu->arch.mmio_decode.insn = insn;
+ vcpu->arch.mmio_decode.insn_len = insn_len;
+ vcpu->arch.mmio_decode.shift = 0;
+ vcpu->arch.mmio_decode.len = len;
+ vcpu->arch.mmio_decode.return_handled = 0;
+
+ /* Copy data to kvm_run instance */
+ switch (len) {
+ case 1:
+ *((u8 *)run->mmio.data) = data8;
+ break;
+ case 2:
+ *((u16 *)run->mmio.data) = data16;
+ break;
+ case 4:
+ *((u32 *)run->mmio.data) = data32;
+ break;
+ case 8:
+ *((u64 *)run->mmio.data) = data64;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* Update MMIO details in kvm_run struct */
+ run->mmio.is_write = true;
+ run->mmio.phys_addr = fault_addr;
+ run->mmio.len = len;
+
+ /* Try to handle MMIO access in the kernel */
+ if (!kvm_io_bus_write(vcpu, KVM_MMIO_BUS,
+ fault_addr, len, run->mmio.data)) {
+ /* Successfully handled MMIO access in the kernel so resume */
+ vcpu->stat.mmio_exit_kernel++;
+ kvm_riscv_vcpu_mmio_return(vcpu, run);
+ return 1;
+ }
+
+ /* Exit to userspace for MMIO emulation */
+ vcpu->stat.mmio_exit_user++;
+ run->exit_reason = KVM_EXIT_MMIO;
+
+ return 0;
+}
+
+static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_cpu_trap *trap)
+{
+ struct kvm_memory_slot *memslot;
+ unsigned long hva, fault_addr;
+ bool writeable;
+ gfn_t gfn;
+ int ret;
+
+ fault_addr = (trap->htval << 2) | (trap->stval & 0x3);
+ gfn = fault_addr >> PAGE_SHIFT;
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable);
+
+ if (kvm_is_error_hva(hva) ||
+ (trap->scause == EXC_STORE_GUEST_PAGE_FAULT && !writeable)) {
+ switch (trap->scause) {
+ case EXC_LOAD_GUEST_PAGE_FAULT:
+ return emulate_load(vcpu, run, fault_addr,
+ trap->htinst);
+ case EXC_STORE_GUEST_PAGE_FAULT:
+ return emulate_store(vcpu, run, fault_addr,
+ trap->htinst);
+ default:
+ return -EOPNOTSUPP;
+ };
+ }
+
+ ret = kvm_riscv_gstage_map(vcpu, memslot, fault_addr, hva,
+ (trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false);
+ if (ret < 0)
+ return ret;
+
+ return 1;
+}
+
+/**
+ * kvm_riscv_vcpu_wfi -- Emulate wait for interrupt (WFI) behaviour
+ *
+ * @vcpu: The VCPU pointer
+ */
+void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_arch_vcpu_runnable(vcpu)) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ kvm_vcpu_halt(vcpu);
+ kvm_vcpu_srcu_read_lock(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ }
+}
+
+/**
+ * kvm_riscv_vcpu_unpriv_read -- Read machine word from Guest memory
+ *
+ * @vcpu: The VCPU pointer
+ * @read_insn: Flag representing whether we are reading instruction
+ * @guest_addr: Guest address to read
+ * @trap: Output pointer to trap details
+ */
+unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
+ bool read_insn,
+ unsigned long guest_addr,
+ struct kvm_cpu_trap *trap)
+{
+ register unsigned long taddr asm("a0") = (unsigned long)trap;
+ register unsigned long ttmp asm("a1");
+ register unsigned long val asm("t0");
+ register unsigned long tmp asm("t1");
+ register unsigned long addr asm("t2") = guest_addr;
+ unsigned long flags;
+ unsigned long old_stvec, old_hstatus;
+
+ local_irq_save(flags);
+
+ old_hstatus = csr_swap(CSR_HSTATUS, vcpu->arch.guest_context.hstatus);
+ old_stvec = csr_swap(CSR_STVEC, (ulong)&__kvm_riscv_unpriv_trap);
+
+ if (read_insn) {
+ /*
+ * HLVX.HU instruction
+ * 0110010 00011 rs1 100 rd 1110011
+ */
+ asm volatile ("\n"
+ ".option push\n"
+ ".option norvc\n"
+ "add %[ttmp], %[taddr], 0\n"
+ /*
+ * HLVX.HU %[val], (%[addr])
+ * HLVX.HU t0, (t2)
+ * 0110010 00011 00111 100 00101 1110011
+ */
+ ".word 0x6433c2f3\n"
+ "andi %[tmp], %[val], 3\n"
+ "addi %[tmp], %[tmp], -3\n"
+ "bne %[tmp], zero, 2f\n"
+ "addi %[addr], %[addr], 2\n"
+ /*
+ * HLVX.HU %[tmp], (%[addr])
+ * HLVX.HU t1, (t2)
+ * 0110010 00011 00111 100 00110 1110011
+ */
+ ".word 0x6433c373\n"
+ "sll %[tmp], %[tmp], 16\n"
+ "add %[val], %[val], %[tmp]\n"
+ "2:\n"
+ ".option pop"
+ : [val] "=&r" (val), [tmp] "=&r" (tmp),
+ [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp),
+ [addr] "+&r" (addr) : : "memory");
+
+ if (trap->scause == EXC_LOAD_PAGE_FAULT)
+ trap->scause = EXC_INST_PAGE_FAULT;
+ } else {
+ /*
+ * HLV.D instruction
+ * 0110110 00000 rs1 100 rd 1110011
+ *
+ * HLV.W instruction
+ * 0110100 00000 rs1 100 rd 1110011
+ */
+ asm volatile ("\n"
+ ".option push\n"
+ ".option norvc\n"
+ "add %[ttmp], %[taddr], 0\n"
+#ifdef CONFIG_64BIT
+ /*
+ * HLV.D %[val], (%[addr])
+ * HLV.D t0, (t2)
+ * 0110110 00000 00111 100 00101 1110011
+ */
+ ".word 0x6c03c2f3\n"
+#else
+ /*
+ * HLV.W %[val], (%[addr])
+ * HLV.W t0, (t2)
+ * 0110100 00000 00111 100 00101 1110011
+ */
+ ".word 0x6803c2f3\n"
+#endif
+ ".option pop"
+ : [val] "=&r" (val),
+ [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp)
+ : [addr] "r" (addr) : "memory");
+ }
+
+ csr_write(CSR_STVEC, old_stvec);
+ csr_write(CSR_HSTATUS, old_hstatus);
+
+ local_irq_restore(flags);
+
+ return val;
+}
+
+/**
+ * kvm_riscv_vcpu_trap_redirect -- Redirect trap to Guest
+ *
+ * @vcpu: The VCPU pointer
+ * @trap: Trap details
+ */
+void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
+ struct kvm_cpu_trap *trap)
+{
+ unsigned long vsstatus = csr_read(CSR_VSSTATUS);
+
+ /* Change Guest SSTATUS.SPP bit */
+ vsstatus &= ~SR_SPP;
+ if (vcpu->arch.guest_context.sstatus & SR_SPP)
+ vsstatus |= SR_SPP;
+
+ /* Change Guest SSTATUS.SPIE bit */
+ vsstatus &= ~SR_SPIE;
+ if (vsstatus & SR_SIE)
+ vsstatus |= SR_SPIE;
+
+ /* Clear Guest SSTATUS.SIE bit */
+ vsstatus &= ~SR_SIE;
+
+ /* Update Guest SSTATUS */
+ csr_write(CSR_VSSTATUS, vsstatus);
+
+ /* Update Guest SCAUSE, STVAL, and SEPC */
+ csr_write(CSR_VSCAUSE, trap->scause);
+ csr_write(CSR_VSTVAL, trap->stval);
+ csr_write(CSR_VSEPC, trap->sepc);
+
+ /* Set Guest PC to Guest exception vector */
+ vcpu->arch.guest_context.sepc = csr_read(CSR_VSTVEC);
+}
+
+/**
+ * kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation
+ * or in-kernel IO emulation
+ *
+ * @vcpu: The VCPU pointer
+ * @run: The VCPU run struct containing the mmio data
+ */
+int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u8 data8;
+ u16 data16;
+ u32 data32;
+ u64 data64;
+ ulong insn;
+ int len, shift;
+
+ if (vcpu->arch.mmio_decode.return_handled)
+ return 0;
+
+ vcpu->arch.mmio_decode.return_handled = 1;
+ insn = vcpu->arch.mmio_decode.insn;
+
+ if (run->mmio.is_write)
+ goto done;
+
+ len = vcpu->arch.mmio_decode.len;
+ shift = vcpu->arch.mmio_decode.shift;
+
+ switch (len) {
+ case 1:
+ data8 = *((u8 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data8 << shift >> shift);
+ break;
+ case 2:
+ data16 = *((u16 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data16 << shift >> shift);
+ break;
+ case 4:
+ data32 = *((u32 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data32 << shift >> shift);
+ break;
+ case 8:
+ data64 = *((u64 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data64 << shift >> shift);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+done:
+ /* Move to next instruction */
+ vcpu->arch.guest_context.sepc += vcpu->arch.mmio_decode.insn_len;
+
+ return 0;
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_cpu_trap *trap)
+{
+ int ret;
+
+ /* If we got host interrupt then do nothing */
+ if (trap->scause & CAUSE_IRQ_FLAG)
+ return 1;
+
+ /* Handle guest traps */
+ ret = -EFAULT;
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ switch (trap->scause) {
+ case EXC_VIRTUAL_INST_FAULT:
+ if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+ ret = virtual_inst_fault(vcpu, run, trap);
+ break;
+ case EXC_INST_GUEST_PAGE_FAULT:
+ case EXC_LOAD_GUEST_PAGE_FAULT:
+ case EXC_STORE_GUEST_PAGE_FAULT:
+ if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+ ret = gstage_page_fault(vcpu, run, trap);
+ break;
+ case EXC_SUPERVISOR_SYSCALL:
+ if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+ ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
+ break;
+ default:
+ break;
+ }
+
+ /* Print details in-case of error */
+ if (ret < 0) {
+ kvm_err("VCPU exit error %d\n", ret);
+ kvm_err("SEPC=0x%lx SSTATUS=0x%lx HSTATUS=0x%lx\n",
+ vcpu->arch.guest_context.sepc,
+ vcpu->arch.guest_context.sstatus,
+ vcpu->arch.guest_context.hstatus);
+ kvm_err("SCAUSE=0x%lx STVAL=0x%lx HTVAL=0x%lx HTINST=0x%lx\n",
+ trap->scause, trap->stval, trap->htval, trap->htinst);
+ }
+
+ return ret;
+}
diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c
new file mode 100644
index 000000000000..d4308c512007
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_fp.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <asm/hwcap.h>
+
+#ifdef CONFIG_FPU
+void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
+{
+ unsigned long isa = vcpu->arch.isa;
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+ cntx->sstatus &= ~SR_FS;
+ if (riscv_isa_extension_available(&isa, f) ||
+ riscv_isa_extension_available(&isa, d))
+ cntx->sstatus |= SR_FS_INITIAL;
+ else
+ cntx->sstatus |= SR_FS_OFF;
+}
+
+static void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
+{
+ cntx->sstatus &= ~SR_FS;
+ cntx->sstatus |= SR_FS_CLEAN;
+}
+
+void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+ if ((cntx->sstatus & SR_FS) == SR_FS_DIRTY) {
+ if (riscv_isa_extension_available(&isa, d))
+ __kvm_riscv_fp_d_save(cntx);
+ else if (riscv_isa_extension_available(&isa, f))
+ __kvm_riscv_fp_f_save(cntx);
+ kvm_riscv_vcpu_fp_clean(cntx);
+ }
+}
+
+void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+ if ((cntx->sstatus & SR_FS) != SR_FS_OFF) {
+ if (riscv_isa_extension_available(&isa, d))
+ __kvm_riscv_fp_d_restore(cntx);
+ else if (riscv_isa_extension_available(&isa, f))
+ __kvm_riscv_fp_f_restore(cntx);
+ kvm_riscv_vcpu_fp_clean(cntx);
+ }
+}
+
+void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
+{
+ /* No need to check host sstatus as it can be modified outside */
+ if (riscv_isa_extension_available(NULL, d))
+ __kvm_riscv_fp_d_save(cntx);
+ else if (riscv_isa_extension_available(NULL, f))
+ __kvm_riscv_fp_f_save(cntx);
+}
+
+void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx)
+{
+ if (riscv_isa_extension_available(NULL, d))
+ __kvm_riscv_fp_d_restore(cntx);
+ else if (riscv_isa_extension_available(NULL, f))
+ __kvm_riscv_fp_f_restore(cntx);
+}
+#endif
+
+int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long isa = vcpu->arch.isa;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ rtype);
+ void *reg_val;
+
+ if ((rtype == KVM_REG_RISCV_FP_F) &&
+ riscv_isa_extension_available(&isa, f)) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+ return -EINVAL;
+ if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
+ reg_val = &cntx->fp.f.fcsr;
+ else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
+ reg_val = &cntx->fp.f.f[reg_num];
+ else
+ return -EINVAL;
+ } else if ((rtype == KVM_REG_RISCV_FP_D) &&
+ riscv_isa_extension_available(&isa, d)) {
+ if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+ return -EINVAL;
+ reg_val = &cntx->fp.d.fcsr;
+ } else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+ return -EINVAL;
+ reg_val = &cntx->fp.d.f[reg_num];
+ } else
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long isa = vcpu->arch.isa;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ rtype);
+ void *reg_val;
+
+ if ((rtype == KVM_REG_RISCV_FP_F) &&
+ riscv_isa_extension_available(&isa, f)) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+ return -EINVAL;
+ if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
+ reg_val = &cntx->fp.f.fcsr;
+ else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
+ reg_val = &cntx->fp.f.f[reg_num];
+ else
+ return -EINVAL;
+ } else if ((rtype == KVM_REG_RISCV_FP_D) &&
+ riscv_isa_extension_available(&isa, d)) {
+ if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+ return -EINVAL;
+ reg_val = &cntx->fp.d.fcsr;
+ } else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+ return -EINVAL;
+ reg_val = &cntx->fp.d.f[reg_num];
+ } else
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
new file mode 100644
index 000000000000..d45e7da3f0d3
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_linux_err_map_sbi(int err)
+{
+ switch (err) {
+ case 0:
+ return SBI_SUCCESS;
+ case -EPERM:
+ return SBI_ERR_DENIED;
+ case -EINVAL:
+ return SBI_ERR_INVALID_PARAM;
+ case -EFAULT:
+ return SBI_ERR_INVALID_ADDRESS;
+ case -EOPNOTSUPP:
+ return SBI_ERR_NOT_SUPPORTED;
+ case -EALREADY:
+ return SBI_ERR_ALREADY_AVAILABLE;
+ default:
+ return SBI_ERR_FAILURE;
+ };
+}
+
+#ifdef CONFIG_RISCV_SBI_V01
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
+#else
+static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
+ .extid_start = -1UL,
+ .extid_end = -1UL,
+ .handler = NULL,
+};
+#endif
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
+
+static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
+ &vcpu_sbi_ext_v01,
+ &vcpu_sbi_ext_base,
+ &vcpu_sbi_ext_time,
+ &vcpu_sbi_ext_ipi,
+ &vcpu_sbi_ext_rfence,
+ &vcpu_sbi_ext_srst,
+ &vcpu_sbi_ext_hsm,
+ &vcpu_sbi_ext_experimental,
+ &vcpu_sbi_ext_vendor,
+};
+
+void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+
+ vcpu->arch.sbi_context.return_handled = 0;
+ vcpu->stat.ecall_exit_stat++;
+ run->exit_reason = KVM_EXIT_RISCV_SBI;
+ run->riscv_sbi.extension_id = cp->a7;
+ run->riscv_sbi.function_id = cp->a6;
+ run->riscv_sbi.args[0] = cp->a0;
+ run->riscv_sbi.args[1] = cp->a1;
+ run->riscv_sbi.args[2] = cp->a2;
+ run->riscv_sbi.args[3] = cp->a3;
+ run->riscv_sbi.args[4] = cp->a4;
+ run->riscv_sbi.args[5] = cp->a5;
+ run->riscv_sbi.ret[0] = cp->a0;
+ run->riscv_sbi.ret[1] = cp->a1;
+}
+
+void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ u32 type, u64 reason)
+{
+ unsigned long i;
+ struct kvm_vcpu *tmp;
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm)
+ tmp->arch.power_off = true;
+ kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+
+ memset(&run->system_event, 0, sizeof(run->system_event));
+ run->system_event.type = type;
+ run->system_event.ndata = 1;
+ run->system_event.data[0] = reason;
+ run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+
+ /* Handle SBI return only once */
+ if (vcpu->arch.sbi_context.return_handled)
+ return 0;
+ vcpu->arch.sbi_context.return_handled = 1;
+
+ /* Update return values */
+ cp->a0 = run->riscv_sbi.ret[0];
+ cp->a1 = run->riscv_sbi.ret[1];
+
+ /* Move to next instruction */
+ vcpu->arch.guest_context.sepc += 4;
+
+ return 0;
+}
+
+const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid)
+{
+ int i = 0;
+
+ for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
+ if (sbi_ext[i]->extid_start <= extid &&
+ sbi_ext[i]->extid_end >= extid)
+ return sbi_ext[i];
+ }
+
+ return NULL;
+}
+
+int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int ret = 1;
+ bool next_sepc = true;
+ bool userspace_exit = false;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ const struct kvm_vcpu_sbi_extension *sbi_ext;
+ struct kvm_cpu_trap utrap = { 0 };
+ unsigned long out_val = 0;
+ bool ext_is_v01 = false;
+
+ sbi_ext = kvm_vcpu_sbi_find_ext(cp->a7);
+ if (sbi_ext && sbi_ext->handler) {
+#ifdef CONFIG_RISCV_SBI_V01
+ if (cp->a7 >= SBI_EXT_0_1_SET_TIMER &&
+ cp->a7 <= SBI_EXT_0_1_SHUTDOWN)
+ ext_is_v01 = true;
+#endif
+ ret = sbi_ext->handler(vcpu, run, &out_val, &utrap, &userspace_exit);
+ } else {
+ /* Return error for unsupported SBI calls */
+ cp->a0 = SBI_ERR_NOT_SUPPORTED;
+ goto ecall_done;
+ }
+
+ /* Handle special error cases i.e trap, exit or userspace forward */
+ if (utrap.scause) {
+ /* No need to increment sepc or exit ioctl loop */
+ ret = 1;
+ utrap.sepc = cp->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ next_sepc = false;
+ goto ecall_done;
+ }
+
+ /* Exit ioctl loop or Propagate the error code the guest */
+ if (userspace_exit) {
+ next_sepc = false;
+ ret = 0;
+ } else {
+ /**
+ * SBI extension handler always returns an Linux error code. Convert
+ * it to the SBI specific error code that can be propagated the SBI
+ * caller.
+ */
+ ret = kvm_linux_err_map_sbi(ret);
+ cp->a0 = ret;
+ ret = 1;
+ }
+ecall_done:
+ if (next_sepc)
+ cp->sepc += 4;
+ if (!ext_is_v01)
+ cp->a1 = out_val;
+
+ return ret;
+}
diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c
new file mode 100644
index 000000000000..48f431091cdb
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_base.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/version.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *trap, bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct sbiret ecall_ret;
+
+ switch (cp->a6) {
+ case SBI_EXT_BASE_GET_SPEC_VERSION:
+ *out_val = (KVM_SBI_VERSION_MAJOR <<
+ SBI_SPEC_VERSION_MAJOR_SHIFT) |
+ KVM_SBI_VERSION_MINOR;
+ break;
+ case SBI_EXT_BASE_GET_IMP_ID:
+ *out_val = KVM_SBI_IMPID;
+ break;
+ case SBI_EXT_BASE_GET_IMP_VERSION:
+ *out_val = LINUX_VERSION_CODE;
+ break;
+ case SBI_EXT_BASE_PROBE_EXT:
+ if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START &&
+ cp->a0 <= SBI_EXT_EXPERIMENTAL_END) ||
+ (cp->a0 >= SBI_EXT_VENDOR_START &&
+ cp->a0 <= SBI_EXT_VENDOR_END)) {
+ /*
+ * For experimental/vendor extensions
+ * forward it to the userspace
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ } else
+ *out_val = kvm_vcpu_sbi_find_ext(cp->a0) ? 1 : 0;
+ break;
+ case SBI_EXT_BASE_GET_MVENDORID:
+ case SBI_EXT_BASE_GET_MARCHID:
+ case SBI_EXT_BASE_GET_MIMPID:
+ ecall_ret = sbi_ecall(SBI_EXT_BASE, cp->a6, 0, 0, 0, 0, 0, 0);
+ if (!ecall_ret.error)
+ *out_val = ecall_ret.value;
+ /*TODO: We are unnecessarily converting the error twice */
+ ret = sbi_err_map_linux_errno(ecall_ret.error);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base = {
+ .extid_start = SBI_EXT_BASE,
+ .extid_end = SBI_EXT_BASE,
+ .handler = kvm_sbi_ext_base_handler,
+};
+
+static int kvm_sbi_ext_forward_handler(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ /*
+ * Both SBI experimental and vendor extensions are
+ * unconditionally forwarded to userspace.
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ return 0;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental = {
+ .extid_start = SBI_EXT_EXPERIMENTAL_START,
+ .extid_end = SBI_EXT_EXPERIMENTAL_END,
+ .handler = kvm_sbi_ext_forward_handler,
+};
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor = {
+ .extid_start = SBI_EXT_VENDOR_START,
+ .extid_end = SBI_EXT_VENDOR_END,
+ .handler = kvm_sbi_ext_forward_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c
new file mode 100644
index 000000000000..239dec0a628a
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_hsm.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *reset_cntx;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct kvm_vcpu *target_vcpu;
+ unsigned long target_vcpuid = cp->a0;
+
+ target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
+ if (!target_vcpu)
+ return -EINVAL;
+ if (!target_vcpu->arch.power_off)
+ return -EALREADY;
+
+ reset_cntx = &target_vcpu->arch.guest_reset_context;
+ /* start address */
+ reset_cntx->sepc = cp->a1;
+ /* target vcpu id to start */
+ reset_cntx->a0 = target_vcpuid;
+ /* private data passed from kernel */
+ reset_cntx->a1 = cp->a2;
+ kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu);
+
+ kvm_riscv_vcpu_power_on(target_vcpu);
+
+ return 0;
+}
+
+static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.power_off)
+ return -EINVAL;
+
+ kvm_riscv_vcpu_power_off(vcpu);
+
+ return 0;
+}
+
+static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long target_vcpuid = cp->a0;
+ struct kvm_vcpu *target_vcpu;
+
+ target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
+ if (!target_vcpu)
+ return -EINVAL;
+ if (!target_vcpu->arch.power_off)
+ return SBI_HSM_STATE_STARTED;
+ else if (vcpu->stat.generic.blocking)
+ return SBI_HSM_STATE_SUSPENDED;
+ else
+ return SBI_HSM_STATE_STOPPED;
+}
+
+static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long funcid = cp->a6;
+
+ switch (funcid) {
+ case SBI_EXT_HSM_HART_START:
+ mutex_lock(&kvm->lock);
+ ret = kvm_sbi_hsm_vcpu_start(vcpu);
+ mutex_unlock(&kvm->lock);
+ break;
+ case SBI_EXT_HSM_HART_STOP:
+ ret = kvm_sbi_hsm_vcpu_stop(vcpu);
+ break;
+ case SBI_EXT_HSM_HART_STATUS:
+ ret = kvm_sbi_hsm_vcpu_get_status(vcpu);
+ if (ret >= 0) {
+ *out_val = ret;
+ ret = 0;
+ }
+ break;
+ case SBI_EXT_HSM_HART_SUSPEND:
+ switch (cp->a0) {
+ case SBI_HSM_SUSPEND_RET_DEFAULT:
+ kvm_riscv_vcpu_wfi(vcpu);
+ break;
+ case SBI_HSM_SUSPEND_NON_RET_DEFAULT:
+ ret = -EOPNOTSUPP;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm = {
+ .extid_start = SBI_EXT_HSM,
+ .extid_end = SBI_EXT_HSM,
+ .handler = kvm_sbi_ext_hsm_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
new file mode 100644
index 000000000000..4c034d8a606a
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ u64 next_cycle;
+
+ if (cp->a6 != SBI_EXT_TIME_SET_TIMER)
+ return -EINVAL;
+
+#if __riscv_xlen == 32
+ next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+ next_cycle = (u64)cp->a0;
+#endif
+ kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time = {
+ .extid_start = SBI_EXT_TIME,
+ .extid_end = SBI_EXT_TIME,
+ .handler = kvm_sbi_ext_time_handler,
+};
+
+static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ unsigned long i;
+ struct kvm_vcpu *tmp;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long hmask = cp->a0;
+ unsigned long hbase = cp->a1;
+
+ if (cp->a6 != SBI_EXT_IPI_SEND_IPI)
+ return -EINVAL;
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ if (hbase != -1UL) {
+ if (tmp->vcpu_id < hbase)
+ continue;
+ if (!(hmask & (1UL << (tmp->vcpu_id - hbase))))
+ continue;
+ }
+ ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi = {
+ .extid_start = SBI_EXT_IPI,
+ .extid_end = SBI_EXT_IPI,
+ .handler = kvm_sbi_ext_ipi_handler,
+};
+
+static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long hmask = cp->a0;
+ unsigned long hbase = cp->a1;
+ unsigned long funcid = cp->a6;
+
+ switch (funcid) {
+ case SBI_EXT_RFENCE_REMOTE_FENCE_I:
+ kvm_riscv_fence_i(vcpu->kvm, hbase, hmask);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
+ if (cp->a2 == 0 && cp->a3 == 0)
+ kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask);
+ else
+ kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
+ cp->a2, cp->a3, PAGE_SHIFT);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
+ if (cp->a2 == 0 && cp->a3 == 0)
+ kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
+ hbase, hmask, cp->a4);
+ else
+ kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm,
+ hbase, hmask,
+ cp->a2, cp->a3,
+ PAGE_SHIFT, cp->a4);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
+ /*
+ * Until nested virtualization is implemented, the
+ * SBI HFENCE calls should be treated as NOPs
+ */
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence = {
+ .extid_start = SBI_EXT_RFENCE,
+ .extid_end = SBI_EXT_RFENCE,
+ .handler = kvm_sbi_ext_rfence_handler,
+};
+
+static int kvm_sbi_ext_srst_handler(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long funcid = cp->a6;
+ u32 reason = cp->a1;
+ u32 type = cp->a0;
+ int ret = 0;
+
+ switch (funcid) {
+ case SBI_EXT_SRST_RESET:
+ switch (type) {
+ case SBI_SRST_RESET_TYPE_SHUTDOWN:
+ kvm_riscv_vcpu_sbi_system_reset(vcpu, run,
+ KVM_SYSTEM_EVENT_SHUTDOWN,
+ reason);
+ *exit = true;
+ break;
+ case SBI_SRST_RESET_TYPE_COLD_REBOOT:
+ case SBI_SRST_RESET_TYPE_WARM_REBOOT:
+ kvm_riscv_vcpu_sbi_system_reset(vcpu, run,
+ KVM_SYSTEM_EVENT_RESET,
+ reason);
+ *exit = true;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst = {
+ .extid_start = SBI_EXT_SRST,
+ .extid_end = SBI_EXT_SRST,
+ .handler = kvm_sbi_ext_srst_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c
new file mode 100644
index 000000000000..8a91a14e7139
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ ulong hmask;
+ int i, ret = 0;
+ u64 next_cycle;
+ struct kvm_vcpu *rvcpu;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+
+ switch (cp->a7) {
+ case SBI_EXT_0_1_CONSOLE_GETCHAR:
+ case SBI_EXT_0_1_CONSOLE_PUTCHAR:
+ /*
+ * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be
+ * handled in kernel so we forward these to user-space
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ break;
+ case SBI_EXT_0_1_SET_TIMER:
+#if __riscv_xlen == 32
+ next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+ next_cycle = (u64)cp->a0;
+#endif
+ ret = kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+ break;
+ case SBI_EXT_0_1_CLEAR_IPI:
+ ret = kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT);
+ break;
+ case SBI_EXT_0_1_SEND_IPI:
+ if (cp->a0)
+ hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+ utrap);
+ else
+ hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
+ if (utrap->scause)
+ break;
+
+ for_each_set_bit(i, &hmask, BITS_PER_LONG) {
+ rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+ ret = kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
+ if (ret < 0)
+ break;
+ }
+ break;
+ case SBI_EXT_0_1_SHUTDOWN:
+ kvm_riscv_vcpu_sbi_system_reset(vcpu, run,
+ KVM_SYSTEM_EVENT_SHUTDOWN, 0);
+ *exit = true;
+ break;
+ case SBI_EXT_0_1_REMOTE_FENCE_I:
+ case SBI_EXT_0_1_REMOTE_SFENCE_VMA:
+ case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID:
+ if (cp->a0)
+ hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+ utrap);
+ else
+ hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
+ if (utrap->scause)
+ break;
+
+ if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
+ kvm_riscv_fence_i(vcpu->kvm, 0, hmask);
+ else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) {
+ if (cp->a1 == 0 && cp->a2 == 0)
+ kvm_riscv_hfence_vvma_all(vcpu->kvm,
+ 0, hmask);
+ else
+ kvm_riscv_hfence_vvma_gva(vcpu->kvm,
+ 0, hmask,
+ cp->a1, cp->a2,
+ PAGE_SHIFT);
+ } else {
+ if (cp->a1 == 0 && cp->a2 == 0)
+ kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
+ 0, hmask,
+ cp->a3);
+ else
+ kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm,
+ 0, hmask,
+ cp->a1, cp->a2,
+ PAGE_SHIFT,
+ cp->a3);
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
+ .extid_start = SBI_EXT_0_1_SET_TIMER,
+ .extid_end = SBI_EXT_0_1_SHUTDOWN,
+ .handler = kvm_sbi_ext_v01_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S
new file mode 100644
index 000000000000..d74df8eb4d71
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_switch.S
@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/csr.h>
+
+ .text
+ .altmacro
+ .option norelax
+
+ENTRY(__kvm_riscv_switch_to)
+ /* Save Host GPRs (except A0 and T0-T6) */
+ REG_S ra, (KVM_ARCH_HOST_RA)(a0)
+ REG_S sp, (KVM_ARCH_HOST_SP)(a0)
+ REG_S gp, (KVM_ARCH_HOST_GP)(a0)
+ REG_S tp, (KVM_ARCH_HOST_TP)(a0)
+ REG_S s0, (KVM_ARCH_HOST_S0)(a0)
+ REG_S s1, (KVM_ARCH_HOST_S1)(a0)
+ REG_S a1, (KVM_ARCH_HOST_A1)(a0)
+ REG_S a2, (KVM_ARCH_HOST_A2)(a0)
+ REG_S a3, (KVM_ARCH_HOST_A3)(a0)
+ REG_S a4, (KVM_ARCH_HOST_A4)(a0)
+ REG_S a5, (KVM_ARCH_HOST_A5)(a0)
+ REG_S a6, (KVM_ARCH_HOST_A6)(a0)
+ REG_S a7, (KVM_ARCH_HOST_A7)(a0)
+ REG_S s2, (KVM_ARCH_HOST_S2)(a0)
+ REG_S s3, (KVM_ARCH_HOST_S3)(a0)
+ REG_S s4, (KVM_ARCH_HOST_S4)(a0)
+ REG_S s5, (KVM_ARCH_HOST_S5)(a0)
+ REG_S s6, (KVM_ARCH_HOST_S6)(a0)
+ REG_S s7, (KVM_ARCH_HOST_S7)(a0)
+ REG_S s8, (KVM_ARCH_HOST_S8)(a0)
+ REG_S s9, (KVM_ARCH_HOST_S9)(a0)
+ REG_S s10, (KVM_ARCH_HOST_S10)(a0)
+ REG_S s11, (KVM_ARCH_HOST_S11)(a0)
+
+ /* Load Guest CSR values */
+ REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0)
+ REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
+ REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
+ la t4, __kvm_switch_return
+ REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0)
+
+ /* Save Host and Restore Guest SSTATUS */
+ csrrw t0, CSR_SSTATUS, t0
+
+ /* Save Host and Restore Guest HSTATUS */
+ csrrw t1, CSR_HSTATUS, t1
+
+ /* Save Host and Restore Guest SCOUNTEREN */
+ csrrw t2, CSR_SCOUNTEREN, t2
+
+ /* Save Host STVEC and change it to return path */
+ csrrw t4, CSR_STVEC, t4
+
+ /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */
+ csrrw t3, CSR_SSCRATCH, a0
+
+ /* Restore Guest SEPC */
+ csrw CSR_SEPC, t5
+
+ /* Store Host CSR values */
+ REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0)
+ REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0)
+ REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
+ REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0)
+ REG_S t4, (KVM_ARCH_HOST_STVEC)(a0)
+
+ /* Restore Guest GPRs (except A0) */
+ REG_L ra, (KVM_ARCH_GUEST_RA)(a0)
+ REG_L sp, (KVM_ARCH_GUEST_SP)(a0)
+ REG_L gp, (KVM_ARCH_GUEST_GP)(a0)
+ REG_L tp, (KVM_ARCH_GUEST_TP)(a0)
+ REG_L t0, (KVM_ARCH_GUEST_T0)(a0)
+ REG_L t1, (KVM_ARCH_GUEST_T1)(a0)
+ REG_L t2, (KVM_ARCH_GUEST_T2)(a0)
+ REG_L s0, (KVM_ARCH_GUEST_S0)(a0)
+ REG_L s1, (KVM_ARCH_GUEST_S1)(a0)
+ REG_L a1, (KVM_ARCH_GUEST_A1)(a0)
+ REG_L a2, (KVM_ARCH_GUEST_A2)(a0)
+ REG_L a3, (KVM_ARCH_GUEST_A3)(a0)
+ REG_L a4, (KVM_ARCH_GUEST_A4)(a0)
+ REG_L a5, (KVM_ARCH_GUEST_A5)(a0)
+ REG_L a6, (KVM_ARCH_GUEST_A6)(a0)
+ REG_L a7, (KVM_ARCH_GUEST_A7)(a0)
+ REG_L s2, (KVM_ARCH_GUEST_S2)(a0)
+ REG_L s3, (KVM_ARCH_GUEST_S3)(a0)
+ REG_L s4, (KVM_ARCH_GUEST_S4)(a0)
+ REG_L s5, (KVM_ARCH_GUEST_S5)(a0)
+ REG_L s6, (KVM_ARCH_GUEST_S6)(a0)
+ REG_L s7, (KVM_ARCH_GUEST_S7)(a0)
+ REG_L s8, (KVM_ARCH_GUEST_S8)(a0)
+ REG_L s9, (KVM_ARCH_GUEST_S9)(a0)
+ REG_L s10, (KVM_ARCH_GUEST_S10)(a0)
+ REG_L s11, (KVM_ARCH_GUEST_S11)(a0)
+ REG_L t3, (KVM_ARCH_GUEST_T3)(a0)
+ REG_L t4, (KVM_ARCH_GUEST_T4)(a0)
+ REG_L t5, (KVM_ARCH_GUEST_T5)(a0)
+ REG_L t6, (KVM_ARCH_GUEST_T6)(a0)
+
+ /* Restore Guest A0 */
+ REG_L a0, (KVM_ARCH_GUEST_A0)(a0)
+
+ /* Resume Guest */
+ sret
+
+ /* Back to Host */
+ .align 2
+__kvm_switch_return:
+ /* Swap Guest A0 with SSCRATCH */
+ csrrw a0, CSR_SSCRATCH, a0
+
+ /* Save Guest GPRs (except A0) */
+ REG_S ra, (KVM_ARCH_GUEST_RA)(a0)
+ REG_S sp, (KVM_ARCH_GUEST_SP)(a0)
+ REG_S gp, (KVM_ARCH_GUEST_GP)(a0)
+ REG_S tp, (KVM_ARCH_GUEST_TP)(a0)
+ REG_S t0, (KVM_ARCH_GUEST_T0)(a0)
+ REG_S t1, (KVM_ARCH_GUEST_T1)(a0)
+ REG_S t2, (KVM_ARCH_GUEST_T2)(a0)
+ REG_S s0, (KVM_ARCH_GUEST_S0)(a0)
+ REG_S s1, (KVM_ARCH_GUEST_S1)(a0)
+ REG_S a1, (KVM_ARCH_GUEST_A1)(a0)
+ REG_S a2, (KVM_ARCH_GUEST_A2)(a0)
+ REG_S a3, (KVM_ARCH_GUEST_A3)(a0)
+ REG_S a4, (KVM_ARCH_GUEST_A4)(a0)
+ REG_S a5, (KVM_ARCH_GUEST_A5)(a0)
+ REG_S a6, (KVM_ARCH_GUEST_A6)(a0)
+ REG_S a7, (KVM_ARCH_GUEST_A7)(a0)
+ REG_S s2, (KVM_ARCH_GUEST_S2)(a0)
+ REG_S s3, (KVM_ARCH_GUEST_S3)(a0)
+ REG_S s4, (KVM_ARCH_GUEST_S4)(a0)
+ REG_S s5, (KVM_ARCH_GUEST_S5)(a0)
+ REG_S s6, (KVM_ARCH_GUEST_S6)(a0)
+ REG_S s7, (KVM_ARCH_GUEST_S7)(a0)
+ REG_S s8, (KVM_ARCH_GUEST_S8)(a0)
+ REG_S s9, (KVM_ARCH_GUEST_S9)(a0)
+ REG_S s10, (KVM_ARCH_GUEST_S10)(a0)
+ REG_S s11, (KVM_ARCH_GUEST_S11)(a0)
+ REG_S t3, (KVM_ARCH_GUEST_T3)(a0)
+ REG_S t4, (KVM_ARCH_GUEST_T4)(a0)
+ REG_S t5, (KVM_ARCH_GUEST_T5)(a0)
+ REG_S t6, (KVM_ARCH_GUEST_T6)(a0)
+
+ /* Load Host CSR values */
+ REG_L t1, (KVM_ARCH_HOST_STVEC)(a0)
+ REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0)
+ REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
+ REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0)
+ REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0)
+
+ /* Save Guest SEPC */
+ csrr t0, CSR_SEPC
+
+ /* Save Guest A0 and Restore Host SSCRATCH */
+ csrrw t2, CSR_SSCRATCH, t2
+
+ /* Restore Host STVEC */
+ csrw CSR_STVEC, t1
+
+ /* Save Guest and Restore Host SCOUNTEREN */
+ csrrw t3, CSR_SCOUNTEREN, t3
+
+ /* Save Guest and Restore Host HSTATUS */
+ csrrw t4, CSR_HSTATUS, t4
+
+ /* Save Guest and Restore Host SSTATUS */
+ csrrw t5, CSR_SSTATUS, t5
+
+ /* Store Guest CSR values */
+ REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0)
+ REG_S t2, (KVM_ARCH_GUEST_A0)(a0)
+ REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
+ REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0)
+ REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0)
+
+ /* Restore Host GPRs (except A0 and T0-T6) */
+ REG_L ra, (KVM_ARCH_HOST_RA)(a0)
+ REG_L sp, (KVM_ARCH_HOST_SP)(a0)
+ REG_L gp, (KVM_ARCH_HOST_GP)(a0)
+ REG_L tp, (KVM_ARCH_HOST_TP)(a0)
+ REG_L s0, (KVM_ARCH_HOST_S0)(a0)
+ REG_L s1, (KVM_ARCH_HOST_S1)(a0)
+ REG_L a1, (KVM_ARCH_HOST_A1)(a0)
+ REG_L a2, (KVM_ARCH_HOST_A2)(a0)
+ REG_L a3, (KVM_ARCH_HOST_A3)(a0)
+ REG_L a4, (KVM_ARCH_HOST_A4)(a0)
+ REG_L a5, (KVM_ARCH_HOST_A5)(a0)
+ REG_L a6, (KVM_ARCH_HOST_A6)(a0)
+ REG_L a7, (KVM_ARCH_HOST_A7)(a0)
+ REG_L s2, (KVM_ARCH_HOST_S2)(a0)
+ REG_L s3, (KVM_ARCH_HOST_S3)(a0)
+ REG_L s4, (KVM_ARCH_HOST_S4)(a0)
+ REG_L s5, (KVM_ARCH_HOST_S5)(a0)
+ REG_L s6, (KVM_ARCH_HOST_S6)(a0)
+ REG_L s7, (KVM_ARCH_HOST_S7)(a0)
+ REG_L s8, (KVM_ARCH_HOST_S8)(a0)
+ REG_L s9, (KVM_ARCH_HOST_S9)(a0)
+ REG_L s10, (KVM_ARCH_HOST_S10)(a0)
+ REG_L s11, (KVM_ARCH_HOST_S11)(a0)
+
+ /* Return to C code */
+ ret
+ENDPROC(__kvm_riscv_switch_to)
+
+ENTRY(__kvm_riscv_unpriv_trap)
+ /*
+ * We assume that faulting unpriv load/store instruction is
+ * 4-byte long and blindly increment SEPC by 4.
+ *
+ * The trap details will be saved at address pointed by 'A0'
+ * register and we use 'A1' register as temporary.
+ */
+ csrr a1, CSR_SEPC
+ REG_S a1, (KVM_ARCH_TRAP_SEPC)(a0)
+ addi a1, a1, 4
+ csrw CSR_SEPC, a1
+ csrr a1, CSR_SCAUSE
+ REG_S a1, (KVM_ARCH_TRAP_SCAUSE)(a0)
+ csrr a1, CSR_STVAL
+ REG_S a1, (KVM_ARCH_TRAP_STVAL)(a0)
+ csrr a1, CSR_HTVAL
+ REG_S a1, (KVM_ARCH_TRAP_HTVAL)(a0)
+ csrr a1, CSR_HTINST
+ REG_S a1, (KVM_ARCH_TRAP_HTINST)(a0)
+ sret
+ENDPROC(__kvm_riscv_unpriv_trap)
+
+#ifdef CONFIG_FPU
+ .align 3
+ .global __kvm_riscv_fp_f_save
+__kvm_riscv_fp_f_save:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ csrs CSR_SSTATUS, t1
+ frcsr t0
+ fsw f0, KVM_ARCH_FP_F_F0(a0)
+ fsw f1, KVM_ARCH_FP_F_F1(a0)
+ fsw f2, KVM_ARCH_FP_F_F2(a0)
+ fsw f3, KVM_ARCH_FP_F_F3(a0)
+ fsw f4, KVM_ARCH_FP_F_F4(a0)
+ fsw f5, KVM_ARCH_FP_F_F5(a0)
+ fsw f6, KVM_ARCH_FP_F_F6(a0)
+ fsw f7, KVM_ARCH_FP_F_F7(a0)
+ fsw f8, KVM_ARCH_FP_F_F8(a0)
+ fsw f9, KVM_ARCH_FP_F_F9(a0)
+ fsw f10, KVM_ARCH_FP_F_F10(a0)
+ fsw f11, KVM_ARCH_FP_F_F11(a0)
+ fsw f12, KVM_ARCH_FP_F_F12(a0)
+ fsw f13, KVM_ARCH_FP_F_F13(a0)
+ fsw f14, KVM_ARCH_FP_F_F14(a0)
+ fsw f15, KVM_ARCH_FP_F_F15(a0)
+ fsw f16, KVM_ARCH_FP_F_F16(a0)
+ fsw f17, KVM_ARCH_FP_F_F17(a0)
+ fsw f18, KVM_ARCH_FP_F_F18(a0)
+ fsw f19, KVM_ARCH_FP_F_F19(a0)
+ fsw f20, KVM_ARCH_FP_F_F20(a0)
+ fsw f21, KVM_ARCH_FP_F_F21(a0)
+ fsw f22, KVM_ARCH_FP_F_F22(a0)
+ fsw f23, KVM_ARCH_FP_F_F23(a0)
+ fsw f24, KVM_ARCH_FP_F_F24(a0)
+ fsw f25, KVM_ARCH_FP_F_F25(a0)
+ fsw f26, KVM_ARCH_FP_F_F26(a0)
+ fsw f27, KVM_ARCH_FP_F_F27(a0)
+ fsw f28, KVM_ARCH_FP_F_F28(a0)
+ fsw f29, KVM_ARCH_FP_F_F29(a0)
+ fsw f30, KVM_ARCH_FP_F_F30(a0)
+ fsw f31, KVM_ARCH_FP_F_F31(a0)
+ sw t0, KVM_ARCH_FP_F_FCSR(a0)
+ csrw CSR_SSTATUS, t2
+ ret
+
+ .align 3
+ .global __kvm_riscv_fp_d_save
+__kvm_riscv_fp_d_save:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ csrs CSR_SSTATUS, t1
+ frcsr t0
+ fsd f0, KVM_ARCH_FP_D_F0(a0)
+ fsd f1, KVM_ARCH_FP_D_F1(a0)
+ fsd f2, KVM_ARCH_FP_D_F2(a0)
+ fsd f3, KVM_ARCH_FP_D_F3(a0)
+ fsd f4, KVM_ARCH_FP_D_F4(a0)
+ fsd f5, KVM_ARCH_FP_D_F5(a0)
+ fsd f6, KVM_ARCH_FP_D_F6(a0)
+ fsd f7, KVM_ARCH_FP_D_F7(a0)
+ fsd f8, KVM_ARCH_FP_D_F8(a0)
+ fsd f9, KVM_ARCH_FP_D_F9(a0)
+ fsd f10, KVM_ARCH_FP_D_F10(a0)
+ fsd f11, KVM_ARCH_FP_D_F11(a0)
+ fsd f12, KVM_ARCH_FP_D_F12(a0)
+ fsd f13, KVM_ARCH_FP_D_F13(a0)
+ fsd f14, KVM_ARCH_FP_D_F14(a0)
+ fsd f15, KVM_ARCH_FP_D_F15(a0)
+ fsd f16, KVM_ARCH_FP_D_F16(a0)
+ fsd f17, KVM_ARCH_FP_D_F17(a0)
+ fsd f18, KVM_ARCH_FP_D_F18(a0)
+ fsd f19, KVM_ARCH_FP_D_F19(a0)
+ fsd f20, KVM_ARCH_FP_D_F20(a0)
+ fsd f21, KVM_ARCH_FP_D_F21(a0)
+ fsd f22, KVM_ARCH_FP_D_F22(a0)
+ fsd f23, KVM_ARCH_FP_D_F23(a0)
+ fsd f24, KVM_ARCH_FP_D_F24(a0)
+ fsd f25, KVM_ARCH_FP_D_F25(a0)
+ fsd f26, KVM_ARCH_FP_D_F26(a0)
+ fsd f27, KVM_ARCH_FP_D_F27(a0)
+ fsd f28, KVM_ARCH_FP_D_F28(a0)
+ fsd f29, KVM_ARCH_FP_D_F29(a0)
+ fsd f30, KVM_ARCH_FP_D_F30(a0)
+ fsd f31, KVM_ARCH_FP_D_F31(a0)
+ sw t0, KVM_ARCH_FP_D_FCSR(a0)
+ csrw CSR_SSTATUS, t2
+ ret
+
+ .align 3
+ .global __kvm_riscv_fp_f_restore
+__kvm_riscv_fp_f_restore:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ lw t0, KVM_ARCH_FP_F_FCSR(a0)
+ csrs CSR_SSTATUS, t1
+ flw f0, KVM_ARCH_FP_F_F0(a0)
+ flw f1, KVM_ARCH_FP_F_F1(a0)
+ flw f2, KVM_ARCH_FP_F_F2(a0)
+ flw f3, KVM_ARCH_FP_F_F3(a0)
+ flw f4, KVM_ARCH_FP_F_F4(a0)
+ flw f5, KVM_ARCH_FP_F_F5(a0)
+ flw f6, KVM_ARCH_FP_F_F6(a0)
+ flw f7, KVM_ARCH_FP_F_F7(a0)
+ flw f8, KVM_ARCH_FP_F_F8(a0)
+ flw f9, KVM_ARCH_FP_F_F9(a0)
+ flw f10, KVM_ARCH_FP_F_F10(a0)
+ flw f11, KVM_ARCH_FP_F_F11(a0)
+ flw f12, KVM_ARCH_FP_F_F12(a0)
+ flw f13, KVM_ARCH_FP_F_F13(a0)
+ flw f14, KVM_ARCH_FP_F_F14(a0)
+ flw f15, KVM_ARCH_FP_F_F15(a0)
+ flw f16, KVM_ARCH_FP_F_F16(a0)
+ flw f17, KVM_ARCH_FP_F_F17(a0)
+ flw f18, KVM_ARCH_FP_F_F18(a0)
+ flw f19, KVM_ARCH_FP_F_F19(a0)
+ flw f20, KVM_ARCH_FP_F_F20(a0)
+ flw f21, KVM_ARCH_FP_F_F21(a0)
+ flw f22, KVM_ARCH_FP_F_F22(a0)
+ flw f23, KVM_ARCH_FP_F_F23(a0)
+ flw f24, KVM_ARCH_FP_F_F24(a0)
+ flw f25, KVM_ARCH_FP_F_F25(a0)
+ flw f26, KVM_ARCH_FP_F_F26(a0)
+ flw f27, KVM_ARCH_FP_F_F27(a0)
+ flw f28, KVM_ARCH_FP_F_F28(a0)
+ flw f29, KVM_ARCH_FP_F_F29(a0)
+ flw f30, KVM_ARCH_FP_F_F30(a0)
+ flw f31, KVM_ARCH_FP_F_F31(a0)
+ fscsr t0
+ csrw CSR_SSTATUS, t2
+ ret
+
+ .align 3
+ .global __kvm_riscv_fp_d_restore
+__kvm_riscv_fp_d_restore:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ lw t0, KVM_ARCH_FP_D_FCSR(a0)
+ csrs CSR_SSTATUS, t1
+ fld f0, KVM_ARCH_FP_D_F0(a0)
+ fld f1, KVM_ARCH_FP_D_F1(a0)
+ fld f2, KVM_ARCH_FP_D_F2(a0)
+ fld f3, KVM_ARCH_FP_D_F3(a0)
+ fld f4, KVM_ARCH_FP_D_F4(a0)
+ fld f5, KVM_ARCH_FP_D_F5(a0)
+ fld f6, KVM_ARCH_FP_D_F6(a0)
+ fld f7, KVM_ARCH_FP_D_F7(a0)
+ fld f8, KVM_ARCH_FP_D_F8(a0)
+ fld f9, KVM_ARCH_FP_D_F9(a0)
+ fld f10, KVM_ARCH_FP_D_F10(a0)
+ fld f11, KVM_ARCH_FP_D_F11(a0)
+ fld f12, KVM_ARCH_FP_D_F12(a0)
+ fld f13, KVM_ARCH_FP_D_F13(a0)
+ fld f14, KVM_ARCH_FP_D_F14(a0)
+ fld f15, KVM_ARCH_FP_D_F15(a0)
+ fld f16, KVM_ARCH_FP_D_F16(a0)
+ fld f17, KVM_ARCH_FP_D_F17(a0)
+ fld f18, KVM_ARCH_FP_D_F18(a0)
+ fld f19, KVM_ARCH_FP_D_F19(a0)
+ fld f20, KVM_ARCH_FP_D_F20(a0)
+ fld f21, KVM_ARCH_FP_D_F21(a0)
+ fld f22, KVM_ARCH_FP_D_F22(a0)
+ fld f23, KVM_ARCH_FP_D_F23(a0)
+ fld f24, KVM_ARCH_FP_D_F24(a0)
+ fld f25, KVM_ARCH_FP_D_F25(a0)
+ fld f26, KVM_ARCH_FP_D_F26(a0)
+ fld f27, KVM_ARCH_FP_D_F27(a0)
+ fld f28, KVM_ARCH_FP_D_F28(a0)
+ fld f29, KVM_ARCH_FP_D_F29(a0)
+ fld f30, KVM_ARCH_FP_D_F30(a0)
+ fld f31, KVM_ARCH_FP_D_F31(a0)
+ fscsr t0
+ csrw CSR_SSTATUS, t2
+ ret
+#endif
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
new file mode 100644
index 000000000000..5c4c37ff2d48
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <clocksource/timer-riscv.h>
+#include <asm/csr.h>
+#include <asm/delay.h>
+#include <asm/kvm_vcpu_timer.h>
+
+static u64 kvm_riscv_current_cycles(struct kvm_guest_timer *gt)
+{
+ return get_cycles64() + gt->time_delta;
+}
+
+static u64 kvm_riscv_delta_cycles2ns(u64 cycles,
+ struct kvm_guest_timer *gt,
+ struct kvm_vcpu_timer *t)
+{
+ unsigned long flags;
+ u64 cycles_now, cycles_delta, delta_ns;
+
+ local_irq_save(flags);
+ cycles_now = kvm_riscv_current_cycles(gt);
+ if (cycles_now < cycles)
+ cycles_delta = cycles - cycles_now;
+ else
+ cycles_delta = 0;
+ delta_ns = (cycles_delta * gt->nsec_mult) >> gt->nsec_shift;
+ local_irq_restore(flags);
+
+ return delta_ns;
+}
+
+static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h)
+{
+ u64 delta_ns;
+ struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt);
+ struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer);
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+
+ if (kvm_riscv_current_cycles(gt) < t->next_cycles) {
+ delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t);
+ hrtimer_forward_now(&t->hrt, ktime_set(0, delta_ns));
+ return HRTIMER_RESTART;
+ }
+
+ t->next_set = false;
+ kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_TIMER);
+
+ return HRTIMER_NORESTART;
+}
+
+static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t)
+{
+ if (!t->init_done || !t->next_set)
+ return -EINVAL;
+
+ hrtimer_cancel(&t->hrt);
+ t->next_set = false;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+ u64 delta_ns;
+
+ if (!t->init_done)
+ return -EINVAL;
+
+ kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_TIMER);
+
+ delta_ns = kvm_riscv_delta_cycles2ns(ncycles, gt, t);
+ t->next_cycles = ncycles;
+ hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL);
+ t->next_set = true;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+ u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_TIMER);
+ u64 reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
+ return -EINVAL;
+
+ switch (reg_num) {
+ case KVM_REG_RISCV_TIMER_REG(frequency):
+ reg_val = riscv_timebase;
+ break;
+ case KVM_REG_RISCV_TIMER_REG(time):
+ reg_val = kvm_riscv_current_cycles(gt);
+ break;
+ case KVM_REG_RISCV_TIMER_REG(compare):
+ reg_val = t->next_cycles;
+ break;
+ case KVM_REG_RISCV_TIMER_REG(state):
+ reg_val = (t->next_set) ? KVM_RISCV_TIMER_STATE_ON :
+ KVM_RISCV_TIMER_STATE_OFF;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+ u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_TIMER);
+ u64 reg_val;
+ int ret = 0;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ switch (reg_num) {
+ case KVM_REG_RISCV_TIMER_REG(frequency):
+ ret = -EOPNOTSUPP;
+ break;
+ case KVM_REG_RISCV_TIMER_REG(time):
+ gt->time_delta = reg_val - get_cycles64();
+ break;
+ case KVM_REG_RISCV_TIMER_REG(compare):
+ t->next_cycles = reg_val;
+ break;
+ case KVM_REG_RISCV_TIMER_REG(state):
+ if (reg_val == KVM_RISCV_TIMER_STATE_ON)
+ ret = kvm_riscv_vcpu_timer_next_event(vcpu, reg_val);
+ else
+ ret = kvm_riscv_vcpu_timer_cancel(t);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ if (t->init_done)
+ return -EINVAL;
+
+ hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ t->hrt.function = kvm_riscv_vcpu_hrtimer_expired;
+ t->init_done = true;
+ t->next_set = false;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ ret = kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer);
+ vcpu->arch.timer.init_done = false;
+
+ return ret;
+}
+
+int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu)
+{
+ return kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer);
+}
+
+void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
+{
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+
+#ifdef CONFIG_64BIT
+ csr_write(CSR_HTIMEDELTA, gt->time_delta);
+#else
+ csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta));
+ csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32));
+#endif
+}
+
+int kvm_riscv_guest_timer_init(struct kvm *kvm)
+{
+ struct kvm_guest_timer *gt = &kvm->arch.timer;
+
+ riscv_cs_get_mult_shift(&gt->nsec_mult, &gt->nsec_shift);
+ gt->time_delta = -get_cycles64();
+
+ return 0;
+}
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
new file mode 100644
index 000000000000..945a2bf5e3f6
--- /dev/null
+++ b/arch/riscv/kvm/vm.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/kvm_host.h>
+
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS()
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+ sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+ int r;
+
+ r = kvm_riscv_gstage_alloc_pgd(kvm);
+ if (r)
+ return r;
+
+ r = kvm_riscv_gstage_vmid_init(kvm);
+ if (r) {
+ kvm_riscv_gstage_free_pgd(kvm);
+ return r;
+ }
+
+ return kvm_riscv_guest_timer_init(kvm);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ kvm_destroy_vcpus(kvm);
+}
+
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_USER_MEMORY:
+ case KVM_CAP_SYNC_MMU:
+ case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+ case KVM_CAP_ONE_REG:
+ case KVM_CAP_READONLY_MEM:
+ case KVM_CAP_MP_STATE:
+ case KVM_CAP_IMMEDIATE_EXIT:
+ r = 1;
+ break;
+ case KVM_CAP_NR_VCPUS:
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
+ break;
+ case KVM_CAP_MAX_VCPUS:
+ r = KVM_MAX_VCPUS;
+ break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_USER_MEM_SLOTS;
+ break;
+ case KVM_CAP_VM_GPA_BITS:
+ r = kvm_riscv_gstage_gpa_bits();
+ break;
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
new file mode 100644
index 000000000000..6cd93995fb65
--- /dev/null
+++ b/arch/riscv/kvm/vmid.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+
+static unsigned long vmid_version = 1;
+static unsigned long vmid_next;
+static unsigned long vmid_bits;
+static DEFINE_SPINLOCK(vmid_lock);
+
+void kvm_riscv_gstage_vmid_detect(void)
+{
+ unsigned long old;
+
+ /* Figure-out number of VMID bits in HW */
+ old = csr_read(CSR_HGATP);
+ csr_write(CSR_HGATP, old | HGATP_VMID_MASK);
+ vmid_bits = csr_read(CSR_HGATP);
+ vmid_bits = (vmid_bits & HGATP_VMID_MASK) >> HGATP_VMID_SHIFT;
+ vmid_bits = fls_long(vmid_bits);
+ csr_write(CSR_HGATP, old);
+
+ /* We polluted local TLB so flush all guest TLB */
+ kvm_riscv_local_hfence_gvma_all();
+
+ /* We don't use VMID bits if they are not sufficient */
+ if ((1UL << vmid_bits) < num_possible_cpus())
+ vmid_bits = 0;
+}
+
+unsigned long kvm_riscv_gstage_vmid_bits(void)
+{
+ return vmid_bits;
+}
+
+int kvm_riscv_gstage_vmid_init(struct kvm *kvm)
+{
+ /* Mark the initial VMID and VMID version invalid */
+ kvm->arch.vmid.vmid_version = 0;
+ kvm->arch.vmid.vmid = 0;
+
+ return 0;
+}
+
+bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid)
+{
+ if (!vmid_bits)
+ return false;
+
+ return unlikely(READ_ONCE(vmid->vmid_version) !=
+ READ_ONCE(vmid_version));
+}
+
+static void __local_hfence_gvma_all(void *info)
+{
+ kvm_riscv_local_hfence_gvma_all();
+}
+
+void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
+{
+ unsigned long i;
+ struct kvm_vcpu *v;
+ struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
+
+ if (!kvm_riscv_gstage_vmid_ver_changed(vmid))
+ return;
+
+ spin_lock(&vmid_lock);
+
+ /*
+ * We need to re-check the vmid_version here to ensure that if
+ * another vcpu already allocated a valid vmid for this vm.
+ */
+ if (!kvm_riscv_gstage_vmid_ver_changed(vmid)) {
+ spin_unlock(&vmid_lock);
+ return;
+ }
+
+ /* First user of a new VMID version? */
+ if (unlikely(vmid_next == 0)) {
+ WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1);
+ vmid_next = 1;
+
+ /*
+ * We ran out of VMIDs so we increment vmid_version and
+ * start assigning VMIDs from 1.
+ *
+ * This also means existing VMIDs assignment to all Guest
+ * instances is invalid and we have force VMID re-assignement
+ * for all Guest instances. The Guest instances that were not
+ * running will automatically pick-up new VMIDs because will
+ * call kvm_riscv_gstage_vmid_update() whenever they enter
+ * in-kernel run loop. For Guest instances that are already
+ * running, we force VM exits on all host CPUs using IPI and
+ * flush all Guest TLBs.
+ */
+ on_each_cpu_mask(cpu_online_mask, __local_hfence_gvma_all,
+ NULL, 1);
+ }
+
+ vmid->vmid = vmid_next;
+ vmid_next++;
+ vmid_next &= (1 << vmid_bits) - 1;
+
+ WRITE_ONCE(vmid->vmid_version, READ_ONCE(vmid_version));
+
+ spin_unlock(&vmid_lock);
+
+ /* Request G-stage page table update for all VCPUs */
+ kvm_for_each_vcpu(i, v, vcpu->kvm)
+ kvm_make_request(KVM_REQ_UPDATE_HGATP, v);
+}
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 0d0db80800c4..25d5c9664e57 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -2,5 +2,8 @@
lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
-lib-y += uaccess.o
+lib-y += memmove.o
+lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
+
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c
index f51c9a03bca1..49d510ba75fd 100644
--- a/arch/riscv/lib/delay.c
+++ b/arch/riscv/lib/delay.c
@@ -4,10 +4,14 @@
*/
#include <linux/delay.h>
+#include <linux/math.h>
#include <linux/param.h>
#include <linux/timex.h>
+#include <linux/types.h>
#include <linux/export.h>
+#include <asm/processor.h>
+
/*
* This is copies from arch/arm/include/asm/delay.h
*
diff --git a/arch/riscv/lib/error-inject.c b/arch/riscv/lib/error-inject.c
new file mode 100644
index 000000000000..d667ade2bc41
--- /dev/null
+++ b/arch/riscv/lib/error-inject.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+ instruction_pointer_set(regs, regs->ra);
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
new file mode 100644
index 000000000000..e0609e1f0864
--- /dev/null
+++ b/arch/riscv/lib/memmove.S
@@ -0,0 +1,316 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Michael T. Kloos <michael@michaelkloos.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+SYM_FUNC_START(__memmove)
+SYM_FUNC_START_WEAK(memmove)
+ /*
+ * Returns
+ * a0 - dest
+ *
+ * Parameters
+ * a0 - Inclusive first byte of dest
+ * a1 - Inclusive first byte of src
+ * a2 - Length of copy n
+ *
+ * Because the return matches the parameter register a0,
+ * we will not clobber or modify that register.
+ *
+ * Note: This currently only works on little-endian.
+ * To port to big-endian, reverse the direction of shifts
+ * in the 2 misaligned fixup copy loops.
+ */
+
+ /* Return if nothing to do */
+ beq a0, a1, return_from_memmove
+ beqz a2, return_from_memmove
+
+ /*
+ * Register Uses
+ * Forward Copy: a1 - Index counter of src
+ * Reverse Copy: a4 - Index counter of src
+ * Forward Copy: t3 - Index counter of dest
+ * Reverse Copy: t4 - Index counter of dest
+ * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest
+ * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest
+ * Both Copy Modes: t0 - Link / Temporary for load-store
+ * Both Copy Modes: t1 - Temporary for load-store
+ * Both Copy Modes: t2 - Temporary for load-store
+ * Both Copy Modes: a5 - dest to src alignment offset
+ * Both Copy Modes: a6 - Shift ammount
+ * Both Copy Modes: a7 - Inverse Shift ammount
+ * Both Copy Modes: a2 - Alternate breakpoint for unrolled loops
+ */
+
+ /*
+ * Solve for some register values now.
+ * Byte copy does not need t5 or t6.
+ */
+ mv t3, a0
+ add t4, a0, a2
+ add a4, a1, a2
+
+ /*
+ * Byte copy if copying less than (2 * SZREG) bytes. This can
+ * cause problems with the bulk copy implementation and is
+ * small enough not to bother.
+ */
+ andi t0, a2, -(2 * SZREG)
+ beqz t0, byte_copy
+
+ /*
+ * Now solve for t5 and t6.
+ */
+ andi t5, t3, -SZREG
+ andi t6, t4, -SZREG
+ /*
+ * If dest(Register t3) rounded down to the nearest naturally
+ * aligned SZREG address, does not equal dest, then add SZREG
+ * to find the low-bound of SZREG alignment in the dest memory
+ * region. Note that this could overshoot the dest memory
+ * region if n is less than SZREG. This is one reason why
+ * we always byte copy if n is less than SZREG.
+ * Otherwise, dest is already naturally aligned to SZREG.
+ */
+ beq t5, t3, 1f
+ addi t5, t5, SZREG
+ 1:
+
+ /*
+ * If the dest and src are co-aligned to SZREG, then there is
+ * no need for the full rigmarole of a full misaligned fixup copy.
+ * Instead, do a simpler co-aligned copy.
+ */
+ xor t0, a0, a1
+ andi t1, t0, (SZREG - 1)
+ beqz t1, coaligned_copy
+ /* Fall through to misaligned fixup copy */
+
+misaligned_fixup_copy:
+ bltu a1, a0, misaligned_fixup_copy_reverse
+
+misaligned_fixup_copy_forward:
+ jal t0, byte_copy_until_aligned_forward
+
+ andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */
+ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
+ sub a5, a1, t3 /* Find the difference between src and dest */
+ andi a1, a1, -SZREG /* Align the src pointer */
+ addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/
+
+ /*
+ * Compute The Inverse Shift
+ * a7 = XLEN - a6 = XLEN + -a6
+ * 2s complement negation to find the negative: -a6 = ~a6 + 1
+ * Add that to XLEN. XLEN = SZREG * 8.
+ */
+ not a7, a6
+ addi a7, a7, (SZREG * 8 + 1)
+
+ /*
+ * Fix Misalignment Copy Loop - Forward
+ * load_val0 = load_ptr[0];
+ * do {
+ * load_val1 = load_ptr[1];
+ * store_ptr += 2;
+ * store_ptr[0 - 2] = (load_val0 >> {a6}) | (load_val1 << {a7});
+ *
+ * if (store_ptr == {a2})
+ * break;
+ *
+ * load_val0 = load_ptr[2];
+ * load_ptr += 2;
+ * store_ptr[1 - 2] = (load_val1 >> {a6}) | (load_val0 << {a7});
+ *
+ * } while (store_ptr != store_ptr_end);
+ * store_ptr = store_ptr_end;
+ */
+
+ REG_L t0, (0 * SZREG)(a1)
+ 1:
+ REG_L t1, (1 * SZREG)(a1)
+ addi t3, t3, (2 * SZREG)
+ srl t0, t0, a6
+ sll t2, t1, a7
+ or t2, t0, t2
+ REG_S t2, ((0 * SZREG) - (2 * SZREG))(t3)
+
+ beq t3, a2, 2f
+
+ REG_L t0, (2 * SZREG)(a1)
+ addi a1, a1, (2 * SZREG)
+ srl t1, t1, a6
+ sll t2, t0, a7
+ or t2, t1, t2
+ REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3)
+
+ bne t3, t6, 1b
+ 2:
+ mv t3, t6 /* Fix the dest pointer in case the loop was broken */
+
+ add a1, t3, a5 /* Restore the src pointer */
+ j byte_copy_forward /* Copy any remaining bytes */
+
+misaligned_fixup_copy_reverse:
+ jal t0, byte_copy_until_aligned_reverse
+
+ andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */
+ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
+ sub a5, a4, t4 /* Find the difference between src and dest */
+ andi a4, a4, -SZREG /* Align the src pointer */
+ addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/
+
+ /*
+ * Compute The Inverse Shift
+ * a7 = XLEN - a6 = XLEN + -a6
+ * 2s complement negation to find the negative: -a6 = ~a6 + 1
+ * Add that to XLEN. XLEN = SZREG * 8.
+ */
+ not a7, a6
+ addi a7, a7, (SZREG * 8 + 1)
+
+ /*
+ * Fix Misalignment Copy Loop - Reverse
+ * load_val1 = load_ptr[0];
+ * do {
+ * load_val0 = load_ptr[-1];
+ * store_ptr -= 2;
+ * store_ptr[1] = (load_val0 >> {a6}) | (load_val1 << {a7});
+ *
+ * if (store_ptr == {a2})
+ * break;
+ *
+ * load_val1 = load_ptr[-2];
+ * load_ptr -= 2;
+ * store_ptr[0] = (load_val1 >> {a6}) | (load_val0 << {a7});
+ *
+ * } while (store_ptr != store_ptr_end);
+ * store_ptr = store_ptr_end;
+ */
+
+ REG_L t1, ( 0 * SZREG)(a4)
+ 1:
+ REG_L t0, (-1 * SZREG)(a4)
+ addi t4, t4, (-2 * SZREG)
+ sll t1, t1, a7
+ srl t2, t0, a6
+ or t2, t1, t2
+ REG_S t2, ( 1 * SZREG)(t4)
+
+ beq t4, a2, 2f
+
+ REG_L t1, (-2 * SZREG)(a4)
+ addi a4, a4, (-2 * SZREG)
+ sll t0, t0, a7
+ srl t2, t1, a6
+ or t2, t0, t2
+ REG_S t2, ( 0 * SZREG)(t4)
+
+ bne t4, t5, 1b
+ 2:
+ mv t4, t5 /* Fix the dest pointer in case the loop was broken */
+
+ add a4, t4, a5 /* Restore the src pointer */
+ j byte_copy_reverse /* Copy any remaining bytes */
+
+/*
+ * Simple copy loops for SZREG co-aligned memory locations.
+ * These also make calls to do byte copies for any unaligned
+ * data at their terminations.
+ */
+coaligned_copy:
+ bltu a1, a0, coaligned_copy_reverse
+
+coaligned_copy_forward:
+ jal t0, byte_copy_until_aligned_forward
+
+ 1:
+ REG_L t1, ( 0 * SZREG)(a1)
+ addi a1, a1, SZREG
+ addi t3, t3, SZREG
+ REG_S t1, (-1 * SZREG)(t3)
+ bne t3, t6, 1b
+
+ j byte_copy_forward /* Copy any remaining bytes */
+
+coaligned_copy_reverse:
+ jal t0, byte_copy_until_aligned_reverse
+
+ 1:
+ REG_L t1, (-1 * SZREG)(a4)
+ addi a4, a4, -SZREG
+ addi t4, t4, -SZREG
+ REG_S t1, ( 0 * SZREG)(t4)
+ bne t4, t5, 1b
+
+ j byte_copy_reverse /* Copy any remaining bytes */
+
+/*
+ * These are basically sub-functions within the function. They
+ * are used to byte copy until the dest pointer is in alignment.
+ * At which point, a bulk copy method can be used by the
+ * calling code. These work on the same registers as the bulk
+ * copy loops. Therefore, the register values can be picked
+ * up from where they were left and we avoid code duplication
+ * without any overhead except the call in and return jumps.
+ */
+byte_copy_until_aligned_forward:
+ beq t3, t5, 2f
+ 1:
+ lb t1, 0(a1)
+ addi a1, a1, 1
+ addi t3, t3, 1
+ sb t1, -1(t3)
+ bne t3, t5, 1b
+ 2:
+ jalr zero, 0x0(t0) /* Return to multibyte copy loop */
+
+byte_copy_until_aligned_reverse:
+ beq t4, t6, 2f
+ 1:
+ lb t1, -1(a4)
+ addi a4, a4, -1
+ addi t4, t4, -1
+ sb t1, 0(t4)
+ bne t4, t6, 1b
+ 2:
+ jalr zero, 0x0(t0) /* Return to multibyte copy loop */
+
+/*
+ * Simple byte copy loops.
+ * These will byte copy until they reach the end of data to copy.
+ * At that point, they will call to return from memmove.
+ */
+byte_copy:
+ bltu a1, a0, byte_copy_reverse
+
+byte_copy_forward:
+ beq t3, t4, 2f
+ 1:
+ lb t1, 0(a1)
+ addi a1, a1, 1
+ addi t3, t3, 1
+ sb t1, -1(t3)
+ bne t3, t4, 1b
+ 2:
+ ret
+
+byte_copy_reverse:
+ beq t4, t3, 2f
+ 1:
+ lb t1, -1(a4)
+ addi a4, a4, -1
+ addi t4, t4, -1
+ sb t1, 0(t4)
+ bne t4, t3, 1b
+ 2:
+
+return_from_memmove:
+ ret
+
+SYM_FUNC_END(memmove)
+SYM_FUNC_END(__memmove)
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index fceaeb18cc64..8c475f4da308 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -1,15 +1,13 @@
#include <linux/linkage.h>
#include <asm-generic/export.h>
#include <asm/asm.h>
+#include <asm/asm-extable.h>
#include <asm/csr.h>
.macro fixup op reg addr lbl
100:
\op \reg, \addr
- .section __ex_table,"a"
- .balign RISCV_SZPTR
- RISCV_PTR 100b, \lbl
- .previous
+ _asm_extable 100b, \lbl
.endm
ENTRY(__asm_copy_to_user)
@@ -19,50 +17,167 @@ ENTRY(__asm_copy_from_user)
li t6, SR_SUM
csrs CSR_STATUS, t6
- add a3, a1, a2
- /* Use word-oriented copy only if low-order bits match */
- andi t0, a0, SZREG-1
- andi t1, a1, SZREG-1
- bne t0, t1, 2f
+ /* Save for return value */
+ mv t5, a2
- addi t0, a1, SZREG-1
- andi t1, a3, ~(SZREG-1)
- andi t0, t0, ~(SZREG-1)
/*
- * a3: terminal address of source region
- * t0: lowest XLEN-aligned address in source
- * t1: highest XLEN-aligned address in source
+ * Register allocation for code below:
+ * a0 - start of uncopied dst
+ * a1 - start of uncopied src
+ * a2 - size
+ * t0 - end of uncopied dst
*/
- bgeu t0, t1, 2f
- bltu a1, t0, 4f
+ add t0, a0, a2
+
+ /*
+ * Use byte copy only if too small.
+ * SZREG holds 4 for RV32 and 8 for RV64
+ */
+ li a3, 9*SZREG /* size must be larger than size in word_copy */
+ bltu a2, a3, .Lbyte_copy_tail
+
+ /*
+ * Copy first bytes until dst is aligned to word boundary.
+ * a0 - start of dst
+ * t1 - start of aligned dst
+ */
+ addi t1, a0, SZREG-1
+ andi t1, t1, ~(SZREG-1)
+ /* dst is already aligned, skip */
+ beq a0, t1, .Lskip_align_dst
1:
- fixup REG_L, t2, (a1), 10f
- fixup REG_S, t2, (a0), 10f
- addi a1, a1, SZREG
- addi a0, a0, SZREG
- bltu a1, t1, 1b
+ /* a5 - one byte for copying data */
+ fixup lb a5, 0(a1), 10f
+ addi a1, a1, 1 /* src */
+ fixup sb a5, 0(a0), 10f
+ addi a0, a0, 1 /* dst */
+ bltu a0, t1, 1b /* t1 - start of aligned dst */
+
+.Lskip_align_dst:
+ /*
+ * Now dst is aligned.
+ * Use shift-copy if src is misaligned.
+ * Use word-copy if both src and dst are aligned because
+ * can not use shift-copy which do not require shifting
+ */
+ /* a1 - start of src */
+ andi a3, a1, SZREG-1
+ bnez a3, .Lshift_copy
+
+.Lword_copy:
+ /*
+ * Both src and dst are aligned, unrolled word copy
+ *
+ * a0 - start of aligned dst
+ * a1 - start of aligned src
+ * t0 - end of aligned dst
+ */
+ addi t0, t0, -(8*SZREG) /* not to over run */
2:
- bltu a1, a3, 5f
+ fixup REG_L a4, 0(a1), 10f
+ fixup REG_L a5, SZREG(a1), 10f
+ fixup REG_L a6, 2*SZREG(a1), 10f
+ fixup REG_L a7, 3*SZREG(a1), 10f
+ fixup REG_L t1, 4*SZREG(a1), 10f
+ fixup REG_L t2, 5*SZREG(a1), 10f
+ fixup REG_L t3, 6*SZREG(a1), 10f
+ fixup REG_L t4, 7*SZREG(a1), 10f
+ fixup REG_S a4, 0(a0), 10f
+ fixup REG_S a5, SZREG(a0), 10f
+ fixup REG_S a6, 2*SZREG(a0), 10f
+ fixup REG_S a7, 3*SZREG(a0), 10f
+ fixup REG_S t1, 4*SZREG(a0), 10f
+ fixup REG_S t2, 5*SZREG(a0), 10f
+ fixup REG_S t3, 6*SZREG(a0), 10f
+ fixup REG_S t4, 7*SZREG(a0), 10f
+ addi a0, a0, 8*SZREG
+ addi a1, a1, 8*SZREG
+ bltu a0, t0, 2b
+
+ addi t0, t0, 8*SZREG /* revert to original value */
+ j .Lbyte_copy_tail
+
+.Lshift_copy:
+
+ /*
+ * Word copy with shifting.
+ * For misaligned copy we still perform aligned word copy, but
+ * we need to use the value fetched from the previous iteration and
+ * do some shifts.
+ * This is safe because reading is less than a word size.
+ *
+ * a0 - start of aligned dst
+ * a1 - start of src
+ * a3 - a1 & mask:(SZREG-1)
+ * t0 - end of uncopied dst
+ * t1 - end of aligned dst
+ */
+ /* calculating aligned word boundary for dst */
+ andi t1, t0, ~(SZREG-1)
+ /* Converting unaligned src to aligned src */
+ andi a1, a1, ~(SZREG-1)
+
+ /*
+ * Calculate shifts
+ * t3 - prev shift
+ * t4 - current shift
+ */
+ slli t3, a3, 3 /* converting bytes in a3 to bits */
+ li a5, SZREG*8
+ sub t4, a5, t3
+
+ /* Load the first word to combine with second word */
+ fixup REG_L a5, 0(a1), 10f
3:
+ /* Main shifting copy
+ *
+ * a0 - start of aligned dst
+ * a1 - start of aligned src
+ * t1 - end of aligned dst
+ */
+
+ /* At least one iteration will be executed */
+ srl a4, a5, t3
+ fixup REG_L a5, SZREG(a1), 10f
+ addi a1, a1, SZREG
+ sll a2, a5, t4
+ or a2, a2, a4
+ fixup REG_S a2, 0(a0), 10f
+ addi a0, a0, SZREG
+ bltu a0, t1, 3b
+
+ /* Revert src to original unaligned value */
+ add a1, a1, a3
+
+.Lbyte_copy_tail:
+ /*
+ * Byte copy anything left.
+ *
+ * a0 - start of remaining dst
+ * a1 - start of remaining src
+ * t0 - end of remaining dst
+ */
+ bgeu a0, t0, .Lout_copy_user /* check if end of copy */
+4:
+ fixup lb a5, 0(a1), 10f
+ addi a1, a1, 1 /* src */
+ fixup sb a5, 0(a0), 10f
+ addi a0, a0, 1 /* dst */
+ bltu a0, t0, 4b /* t0 - end of dst */
+
+.Lout_copy_user:
/* Disable access to user memory */
csrc CSR_STATUS, t6
- li a0, 0
+ li a0, 0
+ ret
+
+ /* Exception fixup code */
+10:
+ /* Disable access to user memory */
+ csrs CSR_STATUS, t6
+ mv a0, t5
ret
-4: /* Edge case: unalignment */
- fixup lbu, t2, (a1), 10f
- fixup sb, t2, (a0), 10f
- addi a1, a1, 1
- addi a0, a0, 1
- bltu a1, t0, 4b
- j 1b
-5: /* Edge case: remainder */
- fixup lbu, t2, (a1), 10f
- fixup sb, t2, (a0), 10f
- addi a1, a1, 1
- addi a0, a0, 1
- bltu a1, a3, 5b
- j 3b
ENDPROC(__asm_copy_to_user)
ENDPROC(__asm_copy_from_user)
EXPORT_SYMBOL(__asm_copy_to_user)
@@ -108,19 +223,12 @@ ENTRY(__clear_user)
addi a0, a0, 1
bltu a0, a3, 5b
j 3b
-ENDPROC(__clear_user)
-EXPORT_SYMBOL(__clear_user)
- .section .fixup,"ax"
- .balign 4
- /* Fixup code for __copy_user(10) and __clear_user(11) */
-10:
- /* Disable access to user memory */
- csrs CSR_STATUS, t6
- mv a0, a2
- ret
+ /* Exception fixup code */
11:
+ /* Disable access to user memory */
csrs CSR_STATUS, t6
mv a0, a1
ret
- .previous
+ENDPROC(__clear_user)
+EXPORT_SYMBOL(__clear_user)
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 363ef01c30b1..ac7a25298a04 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -2,9 +2,12 @@
CFLAGS_init.o := -mcmodel=medany
ifdef CONFIG_FTRACE
-CFLAGS_REMOVE_init.o = -pg
+CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
endif
+KCOV_INSTRUMENT_init.o := n
+
obj-y += init.o
obj-y += extable.o
obj-$(CONFIG_MMU) += fault.o pageattr.o
@@ -21,6 +24,9 @@ obj-$(CONFIG_KASAN) += kasan_init.o
ifdef CONFIG_KASAN
KASAN_SANITIZE_kasan_init.o := n
KASAN_SANITIZE_init.o := n
+ifdef CONFIG_DEBUG_VIRTUAL
+KASAN_SANITIZE_physaddr.o := n
+endif
endif
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 094118663285..6cb7d96ad9c7 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -16,6 +16,8 @@ static void ipi_remote_fence_i(void *info)
void flush_icache_all(void)
{
+ local_flush_icache_all();
+
if (IS_ENABLED(CONFIG_RISCV_SBI))
sbi_remote_fence_i(NULL);
else
@@ -65,10 +67,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
*/
smp_mb();
} else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
- cpumask_t hartid_mask;
-
- riscv_cpuid_to_hartid_mask(&others, &hartid_mask);
- sbi_remote_fence_i(cpumask_bits(&hartid_mask));
+ sbi_remote_fence_i(&others);
} else {
on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
}
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 613ec81a8979..7acbfbd14557 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -2,13 +2,274 @@
/*
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2017 SiFive
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
*/
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/static_key.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
+#ifdef CONFIG_MMU
+
+DEFINE_STATIC_KEY_FALSE(use_asid_allocator);
+
+static unsigned long asid_bits;
+static unsigned long num_asids;
+static unsigned long asid_mask;
+
+static atomic_long_t current_version;
+
+static DEFINE_RAW_SPINLOCK(context_lock);
+static cpumask_t context_tlb_flush_pending;
+static unsigned long *context_asid_map;
+
+static DEFINE_PER_CPU(atomic_long_t, active_context);
+static DEFINE_PER_CPU(unsigned long, reserved_context);
+
+static bool check_update_reserved_context(unsigned long cntx,
+ unsigned long newcntx)
+{
+ int cpu;
+ bool hit = false;
+
+ /*
+ * Iterate over the set of reserved CONTEXT looking for a match.
+ * If we find one, then we can update our mm to use new CONTEXT
+ * (i.e. the same CONTEXT in the current_version) but we can't
+ * exit the loop early, since we need to ensure that all copies
+ * of the old CONTEXT are updated to reflect the mm. Failure to do
+ * so could result in us missing the reserved CONTEXT in a future
+ * version.
+ */
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(reserved_context, cpu) == cntx) {
+ hit = true;
+ per_cpu(reserved_context, cpu) = newcntx;
+ }
+ }
+
+ return hit;
+}
+
+static void __flush_context(void)
+{
+ int i;
+ unsigned long cntx;
+
+ /* Must be called with context_lock held */
+ lockdep_assert_held(&context_lock);
+
+ /* Update the list of reserved ASIDs and the ASID bitmap. */
+ bitmap_clear(context_asid_map, 0, num_asids);
+
+ /* Mark already active ASIDs as used */
+ for_each_possible_cpu(i) {
+ cntx = atomic_long_xchg_relaxed(&per_cpu(active_context, i), 0);
+ /*
+ * If this CPU has already been through a rollover, but
+ * hasn't run another task in the meantime, we must preserve
+ * its reserved CONTEXT, as this is the only trace we have of
+ * the process it is still running.
+ */
+ if (cntx == 0)
+ cntx = per_cpu(reserved_context, i);
+
+ __set_bit(cntx & asid_mask, context_asid_map);
+ per_cpu(reserved_context, i) = cntx;
+ }
+
+ /* Mark ASID #0 as used because it is used at boot-time */
+ __set_bit(0, context_asid_map);
+
+ /* Queue a TLB invalidation for each CPU on next context-switch */
+ cpumask_setall(&context_tlb_flush_pending);
+}
+
+static unsigned long __new_context(struct mm_struct *mm)
+{
+ static u32 cur_idx = 1;
+ unsigned long cntx = atomic_long_read(&mm->context.id);
+ unsigned long asid, ver = atomic_long_read(&current_version);
+
+ /* Must be called with context_lock held */
+ lockdep_assert_held(&context_lock);
+
+ if (cntx != 0) {
+ unsigned long newcntx = ver | (cntx & asid_mask);
+
+ /*
+ * If our current CONTEXT was active during a rollover, we
+ * can continue to use it and this was just a false alarm.
+ */
+ if (check_update_reserved_context(cntx, newcntx))
+ return newcntx;
+
+ /*
+ * We had a valid CONTEXT in a previous life, so try to
+ * re-use it if possible.
+ */
+ if (!__test_and_set_bit(cntx & asid_mask, context_asid_map))
+ return newcntx;
+ }
+
+ /*
+ * Allocate a free ASID. If we can't find one then increment
+ * current_version and flush all ASIDs.
+ */
+ asid = find_next_zero_bit(context_asid_map, num_asids, cur_idx);
+ if (asid != num_asids)
+ goto set_asid;
+
+ /* We're out of ASIDs, so increment current_version */
+ ver = atomic_long_add_return_relaxed(num_asids, &current_version);
+
+ /* Flush everything */
+ __flush_context();
+
+ /* We have more ASIDs than CPUs, so this will always succeed */
+ asid = find_next_zero_bit(context_asid_map, num_asids, 1);
+
+set_asid:
+ __set_bit(asid, context_asid_map);
+ cur_idx = asid;
+ return asid | ver;
+}
+
+static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
+{
+ unsigned long flags;
+ bool need_flush_tlb = false;
+ unsigned long cntx, old_active_cntx;
+
+ cntx = atomic_long_read(&mm->context.id);
+
+ /*
+ * If our active_context is non-zero and the context matches the
+ * current_version, then we update the active_context entry with a
+ * relaxed cmpxchg.
+ *
+ * Following is how we handle racing with a concurrent rollover:
+ *
+ * - We get a zero back from the cmpxchg and end up waiting on the
+ * lock. Taking the lock synchronises with the rollover and so
+ * we are forced to see the updated verion.
+ *
+ * - We get a valid context back from the cmpxchg then we continue
+ * using old ASID because __flush_context() would have marked ASID
+ * of active_context as used and next context switch we will
+ * allocate new context.
+ */
+ old_active_cntx = atomic_long_read(&per_cpu(active_context, cpu));
+ if (old_active_cntx &&
+ ((cntx & ~asid_mask) == atomic_long_read(&current_version)) &&
+ atomic_long_cmpxchg_relaxed(&per_cpu(active_context, cpu),
+ old_active_cntx, cntx))
+ goto switch_mm_fast;
+
+ raw_spin_lock_irqsave(&context_lock, flags);
+
+ /* Check that our ASID belongs to the current_version. */
+ cntx = atomic_long_read(&mm->context.id);
+ if ((cntx & ~asid_mask) != atomic_long_read(&current_version)) {
+ cntx = __new_context(mm);
+ atomic_long_set(&mm->context.id, cntx);
+ }
+
+ if (cpumask_test_and_clear_cpu(cpu, &context_tlb_flush_pending))
+ need_flush_tlb = true;
+
+ atomic_long_set(&per_cpu(active_context, cpu), cntx);
+
+ raw_spin_unlock_irqrestore(&context_lock, flags);
+
+switch_mm_fast:
+ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) |
+ ((cntx & asid_mask) << SATP_ASID_SHIFT) |
+ satp_mode);
+
+ if (need_flush_tlb)
+ local_flush_tlb_all();
+}
+
+static void set_mm_noasid(struct mm_struct *mm)
+{
+ /* Switch the page table and blindly nuke entire local TLB */
+ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode);
+ local_flush_tlb_all();
+}
+
+static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
+{
+ if (static_branch_unlikely(&use_asid_allocator))
+ set_mm_asid(mm, cpu);
+ else
+ set_mm_noasid(mm);
+}
+
+static int __init asids_init(void)
+{
+ unsigned long old;
+
+ /* Figure-out number of ASID bits in HW */
+ old = csr_read(CSR_SATP);
+ asid_bits = old | (SATP_ASID_MASK << SATP_ASID_SHIFT);
+ csr_write(CSR_SATP, asid_bits);
+ asid_bits = (csr_read(CSR_SATP) >> SATP_ASID_SHIFT) & SATP_ASID_MASK;
+ asid_bits = fls_long(asid_bits);
+ csr_write(CSR_SATP, old);
+
+ /*
+ * In the process of determining number of ASID bits (above)
+ * we polluted the TLB of current HART so let's do TLB flushed
+ * to remove unwanted TLB enteries.
+ */
+ local_flush_tlb_all();
+
+ /* Pre-compute ASID details */
+ if (asid_bits) {
+ num_asids = 1 << asid_bits;
+ asid_mask = num_asids - 1;
+ }
+
+ /*
+ * Use ASID allocator only if number of HW ASIDs are
+ * at-least twice more than CPUs
+ */
+ if (num_asids > (2 * num_possible_cpus())) {
+ atomic_long_set(&current_version, num_asids);
+
+ context_asid_map = bitmap_zalloc(num_asids, GFP_KERNEL);
+ if (!context_asid_map)
+ panic("Failed to allocate bitmap for %lu ASIDs\n",
+ num_asids);
+
+ __set_bit(0, context_asid_map);
+
+ static_branch_enable(&use_asid_allocator);
+
+ pr_info("ASID allocator using %lu bits (%lu entries)\n",
+ asid_bits, num_asids);
+ } else {
+ pr_info("ASID allocator disabled (%lu bits)\n", asid_bits);
+ }
+
+ return 0;
+}
+early_initcall(asids_init);
+#else
+static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
+{
+ /* Nothing to do here when there is no MMU */
+}
+#endif
+
/*
* When necessary, performs a deferred icache flush for the given MM context,
* on the local CPU. RISC-V has no direct mechanism for instruction cache
@@ -20,11 +281,12 @@
* cache flush to be performed before execution resumes on each hart. This
* actually performs that local instruction cache flush, which implicitly only
* refers to the current hart.
+ *
+ * The "cpu" argument must be the current local CPU number.
*/
-static inline void flush_icache_deferred(struct mm_struct *mm)
+static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu)
{
#ifdef CONFIG_SMP
- unsigned int cpu = smp_processor_id();
cpumask_t *mask = &mm->context.icache_stale_mask;
if (cpumask_test_cpu(cpu, mask)) {
@@ -58,10 +320,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
-#ifdef CONFIG_MMU
- csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
- local_flush_tlb_all();
-#endif
+ set_mm(next, cpu);
- flush_icache_deferred(next);
+ flush_icache_deferred(next, cpu);
}
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 2fc729422151..35484d830fd6 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -7,18 +7,65 @@
*/
+#include <linux/bitfield.h>
#include <linux/extable.h>
#include <linux/module.h>
#include <linux/uaccess.h>
+#include <asm/asm-extable.h>
+#include <asm/ptrace.h>
-int fixup_exception(struct pt_regs *regs)
+static inline unsigned long
+get_ex_fixup(const struct exception_table_entry *ex)
{
- const struct exception_table_entry *fixup;
+ return ((unsigned long)&ex->fixup + ex->fixup);
+}
- fixup = search_exception_tables(regs->epc);
- if (fixup) {
- regs->epc = fixup->fixup;
- return 1;
+static bool ex_handler_fixup(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ regs->epc = get_ex_fixup(ex);
+ return true;
+}
+
+static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
+ unsigned long val)
+{
+ if (unlikely(offset > MAX_REG_OFFSET))
+ return;
+
+ if (offset)
+ *(unsigned long *)((unsigned long)regs + offset) = val;
+}
+
+static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+ int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data);
+
+ regs_set_gpr(regs, reg_err * sizeof(unsigned long), -EFAULT);
+ regs_set_gpr(regs, reg_zero * sizeof(unsigned long), 0);
+
+ regs->epc = get_ex_fixup(ex);
+ return true;
+}
+
+bool fixup_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *ex;
+
+ ex = search_exception_tables(regs->epc);
+ if (!ex)
+ return false;
+
+ switch (ex->type) {
+ case EX_TYPE_FIXUP:
+ return ex_handler_fixup(ex, regs);
+ case EX_TYPE_BPF:
+ return ex_handler_bpf(ex, regs);
+ case EX_TYPE_UACCESS_ERR_ZERO:
+ return ex_handler_uaccess_err_zero(ex, regs);
}
- return 0;
+
+ BUG();
}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index ae7b7fe24658..40694f0cab9e 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -13,13 +13,192 @@
#include <linux/perf_event.h>
#include <linux/signal.h>
#include <linux/uaccess.h>
+#include <linux/kprobes.h>
+#include <linux/kfence.h>
-#include <asm/pgalloc.h>
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
#include "../kernel/head.h"
+static void die_kernel_fault(const char *msg, unsigned long addr,
+ struct pt_regs *regs)
+{
+ bust_spinlocks(1);
+
+ pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg,
+ addr);
+
+ bust_spinlocks(0);
+ die(regs, "Oops");
+ make_task_dead(SIGKILL);
+}
+
+static inline void no_context(struct pt_regs *regs, unsigned long addr)
+{
+ const char *msg;
+
+ /* Are we prepared to handle this kernel fault? */
+ if (fixup_exception(regs))
+ return;
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+ if (addr < PAGE_SIZE)
+ msg = "NULL pointer dereference";
+ else {
+ if (kfence_handle_page_fault(addr, regs->cause == EXC_STORE_PAGE_FAULT, regs))
+ return;
+
+ msg = "paging request";
+ }
+
+ die_kernel_fault(msg, addr, regs);
+}
+
+static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
+{
+ if (fault & VM_FAULT_OOM) {
+ /*
+ * We ran out of memory, call the OOM killer, and return the userspace
+ * (which will retry the fault, or kill us if we got oom-killed).
+ */
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+ pagefault_out_of_memory();
+ return;
+ } else if (fault & VM_FAULT_SIGBUS) {
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+ do_trap(regs, SIGBUS, BUS_ADRERR, addr);
+ return;
+ }
+ BUG();
+}
+
+static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+{
+ /*
+ * Something tried to access memory that isn't in our memory map.
+ * Fix it, but check if it's kernel or user first.
+ */
+ mmap_read_unlock(mm);
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs)) {
+ do_trap(regs, SIGSEGV, code, addr);
+ return;
+ }
+
+ no_context(regs, addr);
+}
+
+static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
+{
+ pgd_t *pgd, *pgd_k;
+ pud_t *pud_k;
+ p4d_t *p4d_k;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+ int index;
+ unsigned long pfn;
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs))
+ return do_trap(regs, SIGSEGV, code, addr);
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "tsk->active_mm->pgd" here.
+ * We might be inside an interrupt in the middle
+ * of a task switch.
+ */
+ index = pgd_index(addr);
+ pfn = csr_read(CSR_SATP) & SATP_PPN;
+ pgd = (pgd_t *)pfn_to_virt(pfn) + index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k)) {
+ no_context(regs, addr);
+ return;
+ }
+ set_pgd(pgd, *pgd_k);
+
+ p4d_k = p4d_offset(pgd_k, addr);
+ if (!p4d_present(*p4d_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ pud_k = pud_offset(p4d_k, addr);
+ if (!pud_present(*pud_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ /*
+ * Since the vmalloc area is global, it is unnecessary
+ * to copy individual PTEs
+ */
+ pmd_k = pmd_offset(pud_k, addr);
+ if (!pmd_present(*pmd_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ /*
+ * Make sure the actual PTE exists as well to
+ * catch kernel vmalloc-area accesses to non-mapped
+ * addresses. If we don't do this, this will just
+ * silently loop forever.
+ */
+ pte_k = pte_offset_kernel(pmd_k, addr);
+ if (!pte_present(*pte_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ /*
+ * The kernel assumes that TLBs don't cache invalid
+ * entries, but in RISC-V, SFENCE.VMA specifies an
+ * ordering constraint, not a cache flush; it is
+ * necessary even after writing invalid entries.
+ */
+ local_flush_tlb_page(addr);
+}
+
+static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
+{
+ switch (cause) {
+ case EXC_INST_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_EXEC)) {
+ return true;
+ }
+ break;
+ case EXC_LOAD_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_READ)) {
+ return true;
+ }
+ break;
+ case EXC_STORE_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_WRITE)) {
+ return true;
+ }
+ break;
+ default:
+ panic("%s: unhandled cause %lu", __func__, cause);
+ }
+ return false;
+}
+
/*
* This routine handles page faults. It determines the address and the
* problem, and then passes it off to one of the appropriate routines.
@@ -40,6 +219,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
tsk = current;
mm = tsk->mm;
+ if (kprobe_page_fault(regs, cause))
+ return;
+
/*
* Fault-in kernel-space virtual memory on-demand.
* The 'reference' page table is init_mm.pgd.
@@ -49,9 +231,24 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
* only copy the information from the master page table,
* nothing more.
*/
- if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
- goto vmalloc_fault;
+ if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) {
+ vmalloc_fault(regs, code, addr);
+ return;
+ }
+#ifdef CONFIG_64BIT
+ /*
+ * Modules in 64bit kernels lie in their own virtual region which is not
+ * in the vmalloc region, but dealing with page faults in this region
+ * or the vmalloc region amounts to doing the same thing: checking that
+ * the mapping exists in init_mm.pgd and updating user page table, so
+ * just use vmalloc_fault.
+ */
+ if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) {
+ vmalloc_fault(regs, code, addr);
+ return;
+ }
+#endif
/* Enable interrupts if they were enabled in the parent context. */
if (likely(regs->status & SR_PIE))
local_irq_enable();
@@ -60,25 +257,46 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
* If we're in an interrupt, have no user context, or are running
* in an atomic region, then we must not take the fault.
*/
- if (unlikely(faulthandler_disabled() || !mm))
- goto no_context;
+ if (unlikely(faulthandler_disabled() || !mm)) {
+ tsk->thread.bad_cause = cause;
+ no_context(regs, addr);
+ return;
+ }
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ if (!user_mode(regs) && addr < TASK_SIZE &&
+ unlikely(!(regs->status & SR_SUM)))
+ die_kernel_fault("access to user memory without uaccess routines",
+ addr, regs);
+
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+ if (cause == EXC_STORE_PAGE_FAULT)
+ flags |= FAULT_FLAG_WRITE;
+ else if (cause == EXC_INST_PAGE_FAULT)
+ flags |= FAULT_FLAG_INSTRUCTION;
retry:
mmap_read_lock(mm);
vma = find_vma(mm, addr);
- if (unlikely(!vma))
- goto bad_area;
+ if (unlikely(!vma)) {
+ tsk->thread.bad_cause = cause;
+ bad_area(regs, mm, code, addr);
+ return;
+ }
if (likely(vma->vm_start <= addr))
goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
- goto bad_area;
- if (unlikely(expand_stack(vma, addr)))
- goto bad_area;
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+ tsk->thread.bad_cause = cause;
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+ if (unlikely(expand_stack(vma, addr))) {
+ tsk->thread.bad_cause = cause;
+ bad_area(regs, mm, code, addr);
+ return;
+ }
/*
* Ok, we have a good vm_area for this memory access, so
@@ -87,22 +305,10 @@ retry:
good_area:
code = SEGV_ACCERR;
- switch (cause) {
- case EXC_INST_PAGE_FAULT:
- if (!(vma->vm_flags & VM_EXEC))
- goto bad_area;
- break;
- case EXC_LOAD_PAGE_FAULT:
- if (!(vma->vm_flags & VM_READ))
- goto bad_area;
- break;
- case EXC_STORE_PAGE_FAULT:
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- flags |= FAULT_FLAG_WRITE;
- break;
- default:
- panic("%s: unhandled cause %lu", __func__, cause);
+ if (unlikely(access_error(cause, vma))) {
+ tsk->thread.bad_cause = cause;
+ bad_area(regs, mm, code, addr);
+ return;
}
/*
@@ -110,7 +316,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, addr, flags);
+ fault = handle_mm_fault(vma, addr, flags, regs);
/*
* If we need to retry but a fatal signal is pending, handle the
@@ -120,158 +326,24 @@ good_area:
if (fault_signal_pending(fault, regs))
return;
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
- }
+ if (unlikely(fault & VM_FAULT_RETRY)) {
+ flags |= FAULT_FLAG_TRIED;
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
- 1, regs, addr);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
- 1, regs, addr);
- }
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
-
- /*
- * No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
- goto retry;
- }
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+ goto retry;
}
mmap_read_unlock(mm);
- return;
- /*
- * Something tried to access memory that isn't in our memory map.
- * Fix it, but check if it's kernel or user first.
- */
-bad_area:
- mmap_read_unlock(mm);
- /* User mode accesses just cause a SIGSEGV */
- if (user_mode(regs)) {
- do_trap(regs, SIGSEGV, code, addr);
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ tsk->thread.bad_cause = cause;
+ mm_fault_error(regs, addr, fault);
return;
}
-
-no_context:
- /* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs))
- return;
-
- /*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
- bust_spinlocks(1);
- pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
- (addr < PAGE_SIZE) ? "NULL pointer dereference" :
- "paging request", addr);
- die(regs, "Oops");
- do_exit(SIGKILL);
-
- /*
- * We ran out of memory, call the OOM killer, and return the userspace
- * (which will retry the fault, or kill us if we got oom-killed).
- */
-out_of_memory:
- mmap_read_unlock(mm);
- if (!user_mode(regs))
- goto no_context;
- pagefault_out_of_memory();
- return;
-
-do_sigbus:
- mmap_read_unlock(mm);
- /* Kernel mode? Handle exceptions or die */
- if (!user_mode(regs))
- goto no_context;
- do_trap(regs, SIGBUS, BUS_ADRERR, addr);
return;
-
-vmalloc_fault:
- {
- pgd_t *pgd, *pgd_k;
- pud_t *pud, *pud_k;
- p4d_t *p4d, *p4d_k;
- pmd_t *pmd, *pmd_k;
- pte_t *pte_k;
- int index;
-
- /* User mode accesses just cause a SIGSEGV */
- if (user_mode(regs))
- return do_trap(regs, SIGSEGV, code, addr);
-
- /*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "tsk->active_mm->pgd" here.
- * We might be inside an interrupt in the middle
- * of a task switch.
- */
- index = pgd_index(addr);
- pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
- pgd_k = init_mm.pgd + index;
-
- if (!pgd_present(*pgd_k))
- goto no_context;
- set_pgd(pgd, *pgd_k);
-
- p4d = p4d_offset(pgd, addr);
- p4d_k = p4d_offset(pgd_k, addr);
- if (!p4d_present(*p4d_k))
- goto no_context;
-
- pud = pud_offset(p4d, addr);
- pud_k = pud_offset(p4d_k, addr);
- if (!pud_present(*pud_k))
- goto no_context;
-
- /*
- * Since the vmalloc area is global, it is unnecessary
- * to copy individual PTEs
- */
- pmd = pmd_offset(pud, addr);
- pmd_k = pmd_offset(pud_k, addr);
- if (!pmd_present(*pmd_k))
- goto no_context;
- set_pmd(pmd, *pmd_k);
-
- /*
- * Make sure the actual PTE exists as well to
- * catch kernel vmalloc-area accesses to non-mapped
- * addresses. If we don't do this, this will just
- * silently loop forever.
- */
- pte_k = pte_offset_kernel(pmd_k, addr);
- if (!pte_present(*pte_k))
- goto no_context;
-
- /*
- * The kernel assumes that TLBs don't cache invalid
- * entries, but in RISC-V, SFENCE.VMA specifies an
- * ordering constraint, not a cache flush; it is
- * necessary even after writing invalid entries.
- */
- local_flush_tlb_page(addr);
-
- return;
- }
}
+NOKPROBE_SYMBOL(do_page_fault);
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index f4adb3684f3d..d466ec670e1f 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -2,6 +2,8 @@
/*
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2020 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
*/
#include <linux/init.h>
@@ -9,10 +11,15 @@
#include <linux/memblock.h>
#include <linux/initrd.h>
#include <linux/swap.h>
+#include <linux/swiotlb.h>
#include <linux/sizes.h>
#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
#include <linux/libfdt.h>
#include <linux/set_memory.h>
+#include <linux/dma-map-ops.h>
+#include <linux/crash_dump.h>
+#include <linux/hugetlb.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
@@ -20,60 +27,124 @@
#include <asm/soc.h>
#include <asm/io.h>
#include <asm/ptdump.h>
+#include <asm/numa.h>
#include "../kernel/head.h"
+struct kernel_mapping kernel_map __ro_after_init;
+EXPORT_SYMBOL(kernel_map);
+#ifdef CONFIG_XIP_KERNEL
+#define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
+#endif
+
+#ifdef CONFIG_64BIT
+u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
+#else
+u64 satp_mode __ro_after_init = SATP_MODE_32;
+#endif
+EXPORT_SYMBOL(satp_mode);
+
+bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
+bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
+EXPORT_SYMBOL(pgtable_l4_enabled);
+EXPORT_SYMBOL(pgtable_l5_enabled);
+
+phys_addr_t phys_ram_base __ro_after_init;
+EXPORT_SYMBOL(phys_ram_base);
+
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
extern char _start[];
-void *dtb_early_va;
+#define DTB_EARLY_BASE_VA PGDIR_SIZE
+void *_dtb_early_va __initdata;
+uintptr_t _dtb_early_pa __initdata;
+
+static phys_addr_t dma32_phys_limit __initdata;
static void __init zone_sizes_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
#ifdef CONFIG_ZONE_DMA32
- max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G,
- (unsigned long) PFN_PHYS(max_low_pfn)));
+ max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
free_area_init(max_zone_pfns);
}
-static void setup_zero_page(void)
-{
- memset((void *)empty_zero_page, 0, PAGE_SIZE);
-}
-
#if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM)
+
+#define LOG2_SZ_1K ilog2(SZ_1K)
+#define LOG2_SZ_1M ilog2(SZ_1M)
+#define LOG2_SZ_1G ilog2(SZ_1G)
+#define LOG2_SZ_1T ilog2(SZ_1T)
+
static inline void print_mlk(char *name, unsigned long b, unsigned long t)
{
pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld kB)\n", name, b, t,
- (((t) - (b)) >> 10));
+ (((t) - (b)) >> LOG2_SZ_1K));
}
static inline void print_mlm(char *name, unsigned long b, unsigned long t)
{
pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld MB)\n", name, b, t,
- (((t) - (b)) >> 20));
+ (((t) - (b)) >> LOG2_SZ_1M));
+}
+
+static inline void print_mlg(char *name, unsigned long b, unsigned long t)
+{
+ pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld GB)\n", name, b, t,
+ (((t) - (b)) >> LOG2_SZ_1G));
+}
+
+#ifdef CONFIG_64BIT
+static inline void print_mlt(char *name, unsigned long b, unsigned long t)
+{
+ pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld TB)\n", name, b, t,
+ (((t) - (b)) >> LOG2_SZ_1T));
+}
+#else
+#define print_mlt(n, b, t) do {} while (0)
+#endif
+
+static inline void print_ml(char *name, unsigned long b, unsigned long t)
+{
+ unsigned long diff = t - b;
+
+ if (IS_ENABLED(CONFIG_64BIT) && (diff >> LOG2_SZ_1T) >= 10)
+ print_mlt(name, b, t);
+ else if ((diff >> LOG2_SZ_1G) >= 10)
+ print_mlg(name, b, t);
+ else if ((diff >> LOG2_SZ_1M) >= 10)
+ print_mlm(name, b, t);
+ else
+ print_mlk(name, b, t);
}
-static void print_vm_layout(void)
+static void __init print_vm_layout(void)
{
pr_notice("Virtual kernel memory layout:\n");
- print_mlk("fixmap", (unsigned long)FIXADDR_START,
- (unsigned long)FIXADDR_TOP);
- print_mlm("pci io", (unsigned long)PCI_IO_START,
- (unsigned long)PCI_IO_END);
- print_mlm("vmemmap", (unsigned long)VMEMMAP_START,
- (unsigned long)VMEMMAP_END);
- print_mlm("vmalloc", (unsigned long)VMALLOC_START,
- (unsigned long)VMALLOC_END);
- print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
- (unsigned long)high_memory);
+ print_ml("fixmap", (unsigned long)FIXADDR_START,
+ (unsigned long)FIXADDR_TOP);
+ print_ml("pci io", (unsigned long)PCI_IO_START,
+ (unsigned long)PCI_IO_END);
+ print_ml("vmemmap", (unsigned long)VMEMMAP_START,
+ (unsigned long)VMEMMAP_END);
+ print_ml("vmalloc", (unsigned long)VMALLOC_START,
+ (unsigned long)VMALLOC_END);
+ print_ml("lowmem", (unsigned long)PAGE_OFFSET,
+ (unsigned long)high_memory);
+ if (IS_ENABLED(CONFIG_64BIT)) {
+#ifdef CONFIG_KASAN
+ print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
+#endif
+
+ print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR,
+ (unsigned long)ADDRESS_SPACE_END);
+ }
}
#else
static void print_vm_layout(void) { }
@@ -85,113 +156,137 @@ void __init mem_init(void)
BUG_ON(!mem_map);
#endif /* CONFIG_FLATMEM */
- high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
+ swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE);
memblock_free_all();
- mem_init_print_info(NULL);
print_vm_layout();
}
-#ifdef CONFIG_BLK_DEV_INITRD
-static void __init setup_initrd(void)
+/* Limit the memory size via mem. */
+static phys_addr_t memory_limit;
+
+static int __init early_mem(char *p)
{
- unsigned long size;
+ u64 size;
- if (initrd_start >= initrd_end) {
- pr_info("initrd not found or empty");
- goto disable;
- }
- if (__pa_symbol(initrd_end) > PFN_PHYS(max_low_pfn)) {
- pr_err("initrd extends beyond end of memory");
- goto disable;
- }
+ if (!p)
+ return 1;
- size = initrd_end - initrd_start;
- memblock_reserve(__pa_symbol(initrd_start), size);
- initrd_below_start_ok = 1;
+ size = memparse(p, &p) & PAGE_MASK;
+ memory_limit = min_t(u64, size, memory_limit);
- pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
- (void *)(initrd_start), size);
- return;
-disable:
- pr_cont(" - disabling initrd\n");
- initrd_start = 0;
- initrd_end = 0;
-}
-#endif /* CONFIG_BLK_DEV_INITRD */
+ pr_notice("Memory limited to %lldMB\n", (u64)memory_limit >> 20);
-static phys_addr_t dtb_early_pa __initdata;
+ return 0;
+}
+early_param("mem", early_mem);
-void __init setup_bootmem(void)
+static void __init setup_bootmem(void)
{
- struct memblock_region *reg;
- phys_addr_t mem_size = 0;
phys_addr_t vmlinux_end = __pa_symbol(&_end);
- phys_addr_t vmlinux_start = __pa_symbol(&_start);
-
- /* Find the memory region containing the kernel */
- for_each_memblock(memory, reg) {
- phys_addr_t end = reg->base + reg->size;
-
- if (reg->base <= vmlinux_start && vmlinux_end <= end) {
- mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET);
-
- /*
- * Remove memblock from the end of usable area to the
- * end of region
- */
- if (reg->base + mem_size < end)
- memblock_remove(reg->base + mem_size,
- end - reg->base - mem_size);
- }
- }
- BUG_ON(mem_size == 0);
+ phys_addr_t max_mapped_addr;
+ phys_addr_t phys_ram_end, vmlinux_start;
- /* Reserve from the start of the kernel to the end of the kernel */
+ if (IS_ENABLED(CONFIG_XIP_KERNEL))
+ vmlinux_start = __pa_symbol(&_sdata);
+ else
+ vmlinux_start = __pa_symbol(&_start);
+
+ memblock_enforce_memory_limit(memory_limit);
+
+ /*
+ * Make sure we align the reservation on PMD_SIZE since we will
+ * map the kernel in the linear mapping as read-only: we do not want
+ * any allocation to happen between _end and the next pmd aligned page.
+ */
+ if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
+ /*
+ * Reserve from the start of the kernel to the end of the kernel
+ */
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
- set_max_mapnr(PFN_DOWN(mem_size));
- max_pfn = PFN_DOWN(memblock_end_of_DRAM());
- max_low_pfn = max_pfn;
+ phys_ram_end = memblock_end_of_DRAM();
+ if (!IS_ENABLED(CONFIG_XIP_KERNEL))
+ phys_ram_base = memblock_start_of_DRAM();
+ /*
+ * memblock allocator is not aware of the fact that last 4K bytes of
+ * the addressable memory can not be mapped because of IS_ERR_VALUE
+ * macro. Make sure that last 4k bytes are not usable by memblock
+ * if end of dram is equal to maximum addressable memory. For 64-bit
+ * kernel, this problem can't happen here as the end of the virtual
+ * address space is occupied by the kernel mapping then this check must
+ * be done as soon as the kernel mapping base address is determined.
+ */
+ if (!IS_ENABLED(CONFIG_64BIT)) {
+ max_mapped_addr = __pa(~(ulong)0);
+ if (max_mapped_addr == (phys_ram_end - 1))
+ memblock_set_current_limit(max_mapped_addr - 4096);
+ }
+
+ min_low_pfn = PFN_UP(phys_ram_base);
+ max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
+ high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
-#ifdef CONFIG_BLK_DEV_INITRD
- setup_initrd();
-#endif /* CONFIG_BLK_DEV_INITRD */
+ dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
+ set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
+ reserve_initrd_mem();
/*
- * Avoid using early_init_fdt_reserve_self() since __pa() does
+ * If DTB is built in, no need to reserve its memblock.
+ * Otherwise, do reserve it but avoid using
+ * early_init_fdt_reserve_self() since __pa() does
* not work for DTB pointers that are fixmap addresses
*/
- memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+ if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) {
+ /*
+ * In case the DTB is not located in a memory region we won't
+ * be able to locate it later on via the linear mapping and
+ * get a segfault when accessing it via __va(dtb_early_pa).
+ * To avoid this situation copy DTB to a memory region.
+ * Note that memblock_phys_alloc will also reserve DTB region.
+ */
+ if (!memblock_is_memory(dtb_early_pa)) {
+ size_t fdt_size = fdt_totalsize(dtb_early_va);
+ phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE);
+ void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size);
+
+ memcpy(new_dtb_early_va, dtb_early_va, fdt_size);
+ early_memunmap(new_dtb_early_va, fdt_size);
+ _dtb_early_pa = new_dtb_early_pa;
+ } else
+ memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+ }
early_init_fdt_scan_reserved_mem();
+ dma_contiguous_reserve(dma32_phys_limit);
+ if (IS_ENABLED(CONFIG_64BIT))
+ hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
memblock_allow_resize();
- memblock_dump_all();
-
- for_each_memblock(memory, reg) {
- unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
- unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
-
- memblock_set_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn),
- &memblock.memory, 0);
- }
}
#ifdef CONFIG_MMU
-unsigned long va_pa_offset;
-EXPORT_SYMBOL(va_pa_offset);
-unsigned long pfn_base;
-EXPORT_SYMBOL(pfn_base);
+struct pt_alloc_ops pt_ops __initdata;
+
+unsigned long riscv_pfn_base __ro_after_init;
+EXPORT_SYMBOL(riscv_pfn_base);
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
-pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
-static bool mmu_enabled;
-
-#define MAX_EARLY_MAPPING_SIZE SZ_128M
+static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
+static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
+#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base))
+#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
+#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
+#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
+#endif /* CONFIG_XIP_KERNEL */
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
@@ -202,35 +297,53 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
ptep = &fixmap_pte[pte_index(addr)];
- if (pgprot_val(prot)) {
+ if (pgprot_val(prot))
set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
- } else {
+ else
pte_clear(&init_mm, addr, ptep);
- local_flush_tlb_page(addr);
- }
+ local_flush_tlb_page(addr);
}
-static pte_t *__init get_pte_virt(phys_addr_t pa)
+static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
{
- if (mmu_enabled) {
- clear_fixmap(FIX_PTE);
- return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
- } else {
- return (pte_t *)((uintptr_t)pa);
- }
+ return (pte_t *)((uintptr_t)pa);
+}
+
+static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_PTE);
+ return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
}
-static phys_addr_t __init alloc_pte(uintptr_t va)
+static inline pte_t *__init get_pte_virt_late(phys_addr_t pa)
+{
+ return (pte_t *) __va(pa);
+}
+
+static inline phys_addr_t __init alloc_pte_early(uintptr_t va)
{
/*
* We only create PMD or PGD early mappings so we
* should never reach here with MMU disabled.
*/
- BUG_ON(!mmu_enabled);
+ BUG();
+}
+static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
+{
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}
+static phys_addr_t __init alloc_pte_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)));
+
+ return __pa(vaddr);
+}
+
static void __init create_pte_mapping(pte_t *ptep,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
@@ -245,36 +358,73 @@ static void __init create_pte_mapping(pte_t *ptep,
#ifndef __PAGETABLE_PMD_FOLDED
-pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
-pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
+static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
+static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
+static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
+#define fixmap_pmd ((pmd_t *)XIP_FIXUP(fixmap_pmd))
+#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd))
+#endif /* CONFIG_XIP_KERNEL */
+
+static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
+#define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d))
+#define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d))
+#endif /* CONFIG_XIP_KERNEL */
+
+static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud))
+#define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud))
+#define early_pud ((pud_t *)XIP_FIXUP(early_pud))
+#endif /* CONFIG_XIP_KERNEL */
+
+static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
+{
+ /* Before MMU is enabled */
+ return (pmd_t *)((uintptr_t)pa);
+}
-#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE
-#define NUM_EARLY_PMDS 1UL
-#else
-#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE)
-#endif
-pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
+static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_PMD);
+ return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
+}
-static pmd_t *__init get_pmd_virt(phys_addr_t pa)
+static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
{
- if (mmu_enabled) {
- clear_fixmap(FIX_PMD);
- return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
- } else {
- return (pmd_t *)((uintptr_t)pa);
- }
+ return (pmd_t *) __va(pa);
}
-static phys_addr_t __init alloc_pmd(uintptr_t va)
+static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
- uintptr_t pmd_num;
+ BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
- if (mmu_enabled)
- return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+ return (uintptr_t)early_pmd;
+}
+
+static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
- pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
- BUG_ON(pmd_num >= NUM_EARLY_PMDS);
- return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
+static phys_addr_t __init alloc_pmd_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page(vaddr)));
+
+ return __pa(vaddr);
}
static void __init create_pmd_mapping(pmd_t *pmdp,
@@ -292,34 +442,184 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
}
if (pmd_none(pmdp[pmd_idx])) {
- pte_phys = alloc_pte(va);
+ pte_phys = pt_ops.alloc_pte(va);
pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
- ptep = get_pte_virt(pte_phys);
+ ptep = pt_ops.get_pte_virt(pte_phys);
memset(ptep, 0, PAGE_SIZE);
} else {
pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
- ptep = get_pte_virt(pte_phys);
+ ptep = pt_ops.get_pte_virt(pte_phys);
}
create_pte_mapping(ptep, va, pa, sz, prot);
}
-#define pgd_next_t pmd_t
-#define alloc_pgd_next(__va) alloc_pmd(__va)
-#define get_pgd_next_virt(__pa) get_pmd_virt(__pa)
+static pud_t *__init get_pud_virt_early(phys_addr_t pa)
+{
+ return (pud_t *)((uintptr_t)pa);
+}
+
+static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_PUD);
+ return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
+}
+
+static pud_t *__init get_pud_virt_late(phys_addr_t pa)
+{
+ return (pud_t *)__va(pa);
+}
+
+static phys_addr_t __init alloc_pud_early(uintptr_t va)
+{
+ /* Only one PUD is available for early mapping */
+ BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+
+ return (uintptr_t)early_pud;
+}
+
+static phys_addr_t __init alloc_pud_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_pud_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
+}
+
+static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
+{
+ return (p4d_t *)((uintptr_t)pa);
+}
+
+static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_P4D);
+ return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
+}
+
+static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
+{
+ return (p4d_t *)__va(pa);
+}
+
+static phys_addr_t __init alloc_p4d_early(uintptr_t va)
+{
+ /* Only one P4D is available for early mapping */
+ BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+
+ return (uintptr_t)early_p4d;
+}
+
+static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_p4d_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
+}
+
+static void __init create_pud_mapping(pud_t *pudp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pmd_t *nextp;
+ phys_addr_t next_phys;
+ uintptr_t pud_index = pud_index(va);
+
+ if (sz == PUD_SIZE) {
+ if (pud_val(pudp[pud_index]) == 0)
+ pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (pud_val(pudp[pud_index]) == 0) {
+ next_phys = pt_ops.alloc_pmd(va);
+ pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
+ nextp = pt_ops.get_pmd_virt(next_phys);
+ memset(nextp, 0, PAGE_SIZE);
+ } else {
+ next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
+ nextp = pt_ops.get_pmd_virt(next_phys);
+ }
+
+ create_pmd_mapping(nextp, va, pa, sz, prot);
+}
+
+static void __init create_p4d_mapping(p4d_t *p4dp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pud_t *nextp;
+ phys_addr_t next_phys;
+ uintptr_t p4d_index = p4d_index(va);
+
+ if (sz == P4D_SIZE) {
+ if (p4d_val(p4dp[p4d_index]) == 0)
+ p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (p4d_val(p4dp[p4d_index]) == 0) {
+ next_phys = pt_ops.alloc_pud(va);
+ p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
+ nextp = pt_ops.get_pud_virt(next_phys);
+ memset(nextp, 0, PAGE_SIZE);
+ } else {
+ next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
+ nextp = pt_ops.get_pud_virt(next_phys);
+ }
+
+ create_pud_mapping(nextp, va, pa, sz, prot);
+}
+
+#define pgd_next_t p4d_t
+#define alloc_pgd_next(__va) (pgtable_l5_enabled ? \
+ pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \
+ pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
+#define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \
+ pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \
+ pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
- create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next fixmap_pmd
+ (pgtable_l5_enabled ? \
+ create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
+ (pgtable_l4_enabled ? \
+ create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \
+ create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
+#define fixmap_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \
+ (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
+#define trampoline_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \
+ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
+#define early_dtb_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \
+ (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
#else
#define pgd_next_t pte_t
-#define alloc_pgd_next(__va) alloc_pte(__va)
-#define get_pgd_next_virt(__pa) get_pte_virt(__pa)
+#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
+#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next fixmap_pte
-#endif
-
-static void __init create_pgd_mapping(pgd_t *pgdp,
+#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
+#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
+#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
+#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
+#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
+#endif /* __PAGETABLE_PMD_FOLDED */
+
+void __init create_pgd_mapping(pgd_t *pgdp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
{
@@ -355,6 +655,129 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
return PMD_SIZE;
}
+#ifdef CONFIG_XIP_KERNEL
+#define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base))
+extern char _xiprom[], _exiprom[], __data_loc;
+
+/* called from head.S with MMU off */
+asmlinkage void __init __copy_data(void)
+{
+ void *from = (void *)(&__data_loc);
+ void *to = (void *)CONFIG_PHYS_RAM_BASE;
+ size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata));
+
+ memcpy(to, from, sz);
+}
+#endif
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+static __init pgprot_t pgprot_from_va(uintptr_t va)
+{
+ if (is_va_kernel_text(va))
+ return PAGE_KERNEL_READ_EXEC;
+
+ /*
+ * In 64-bit kernel, the kernel mapping is outside the linear mapping so
+ * we must protect its linear mapping alias from being executed and
+ * written.
+ * And rodata section is marked readonly in mark_rodata_ro.
+ */
+ if (IS_ENABLED(CONFIG_64BIT) && is_va_kernel_lm_alias_text(va))
+ return PAGE_KERNEL_READ;
+
+ return PAGE_KERNEL;
+}
+
+void mark_rodata_ro(void)
+{
+ set_kernel_memory(__start_rodata, _data, set_memory_ro);
+ if (IS_ENABLED(CONFIG_64BIT))
+ set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
+ set_memory_ro);
+
+ debug_checkwx();
+}
+#else
+static __init pgprot_t pgprot_from_va(uintptr_t va)
+{
+ if (IS_ENABLED(CONFIG_64BIT) && !is_kernel_mapping(va))
+ return PAGE_KERNEL;
+
+ return PAGE_KERNEL_EXEC;
+}
+#endif /* CONFIG_STRICT_KERNEL_RWX */
+
+#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+static void __init disable_pgtable_l5(void)
+{
+ pgtable_l5_enabled = false;
+ kernel_map.page_offset = PAGE_OFFSET_L4;
+ satp_mode = SATP_MODE_48;
+}
+
+static void __init disable_pgtable_l4(void)
+{
+ pgtable_l4_enabled = false;
+ kernel_map.page_offset = PAGE_OFFSET_L3;
+ satp_mode = SATP_MODE_39;
+}
+
+/*
+ * There is a simple way to determine if 4-level is supported by the
+ * underlying hardware: establish 1:1 mapping in 4-level page table mode
+ * then read SATP to see if the configuration was taken into account
+ * meaning sv48 is supported.
+ */
+static __init void set_satp_mode(void)
+{
+ u64 identity_satp, hw_satp;
+ uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
+ bool check_l4 = false;
+
+ create_p4d_mapping(early_p4d,
+ set_satp_mode_pmd, (uintptr_t)early_pud,
+ P4D_SIZE, PAGE_TABLE);
+ create_pud_mapping(early_pud,
+ set_satp_mode_pmd, (uintptr_t)early_pmd,
+ PUD_SIZE, PAGE_TABLE);
+ /* Handle the case where set_satp_mode straddles 2 PMDs */
+ create_pmd_mapping(early_pmd,
+ set_satp_mode_pmd, set_satp_mode_pmd,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+ create_pmd_mapping(early_pmd,
+ set_satp_mode_pmd + PMD_SIZE,
+ set_satp_mode_pmd + PMD_SIZE,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+retry:
+ create_pgd_mapping(early_pg_dir,
+ set_satp_mode_pmd,
+ check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
+ PGDIR_SIZE, PAGE_TABLE);
+
+ identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
+
+ local_flush_tlb_all();
+ csr_write(CSR_SATP, identity_satp);
+ hw_satp = csr_swap(CSR_SATP, 0ULL);
+ local_flush_tlb_all();
+
+ if (hw_satp != identity_satp) {
+ if (!check_l4) {
+ disable_pgtable_l5();
+ check_l4 = true;
+ memset(early_pg_dir, 0, PAGE_SIZE);
+ goto retry;
+ }
+ disable_pgtable_l4();
+ }
+
+ memset(early_pg_dir, 0, PAGE_SIZE);
+ memset(early_p4d, 0, PAGE_SIZE);
+ memset(early_pud, 0, PAGE_SIZE);
+ memset(early_pmd, 0, PAGE_SIZE);
+}
+#endif
+
/*
* setup_vm() is called from head.S with MMU-off.
*
@@ -373,125 +796,331 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
#endif
-asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+#ifdef CONFIG_XIP_KERNEL
+static void __init create_kernel_page_table(pgd_t *pgdir,
+ __always_unused bool early)
+{
+ uintptr_t va, end_va;
+
+ /* Map the flash resident part */
+ end_va = kernel_map.virt_addr + kernel_map.xiprom_sz;
+ for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
+ create_pgd_mapping(pgdir, va,
+ kernel_map.xiprom + (va - kernel_map.virt_addr),
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+
+ /* Map the data in RAM */
+ end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size;
+ for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE)
+ create_pgd_mapping(pgdir, va,
+ kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)),
+ PMD_SIZE, PAGE_KERNEL);
+}
+#else
+static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
{
uintptr_t va, end_va;
- uintptr_t load_pa = (uintptr_t)(&_start);
- uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
- uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
- va_pa_offset = PAGE_OFFSET - load_pa;
- pfn_base = PFN_DOWN(load_pa);
+ end_va = kernel_map.virt_addr + kernel_map.size;
+ for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
+ create_pgd_mapping(pgdir, va,
+ kernel_map.phys_addr + (va - kernel_map.virt_addr),
+ PMD_SIZE,
+ early ?
+ PAGE_KERNEL_EXEC : pgprot_from_va(va));
+}
+#endif
+
+/*
+ * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel,
+ * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
+ * entry.
+ */
+static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
+{
+#ifndef CONFIG_BUILTIN_DTB
+ uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
+
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+ IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
+ PGDIR_SIZE,
+ IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
+
+ if (pgtable_l5_enabled)
+ create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
+ (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
+
+ if (pgtable_l4_enabled)
+ create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
+ (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
+
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
+ pa, PMD_SIZE, PAGE_KERNEL);
+ create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
+ pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
+ }
+ dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else
/*
- * Enforce boot alignment requirements of RV32 and
- * RV64 by only allowing PMD or PGD mappings.
+ * For 64-bit kernel, __va can't be used since it would return a linear
+ * mapping address whereas dtb_early_va will be used before
+ * setup_vm_final installs the linear mapping. For 32-bit kernel, as the
+ * kernel is mapped in the linear mapping, that makes no difference.
*/
- BUG_ON(map_size == PAGE_SIZE);
+ dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
+#endif
+
+ dtb_early_pa = dtb_pa;
+}
+
+/*
+ * MMU is not enabled, the page tables are allocated directly using
+ * early_pmd/pud/p4d and the address returned is the physical one.
+ */
+static void __init pt_ops_set_early(void)
+{
+ pt_ops.alloc_pte = alloc_pte_early;
+ pt_ops.get_pte_virt = get_pte_virt_early;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_early;
+ pt_ops.get_pmd_virt = get_pmd_virt_early;
+ pt_ops.alloc_pud = alloc_pud_early;
+ pt_ops.get_pud_virt = get_pud_virt_early;
+ pt_ops.alloc_p4d = alloc_p4d_early;
+ pt_ops.get_p4d_virt = get_p4d_virt_early;
+#endif
+}
+
+/*
+ * MMU is enabled but page table setup is not complete yet.
+ * fixmap page table alloc functions must be used as a means to temporarily
+ * map the allocated physical pages since the linear mapping does not exist yet.
+ *
+ * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va,
+ * but it will be used as described above.
+ */
+static void __init pt_ops_set_fixmap(void)
+{
+ pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap);
+ pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap);
+ pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
+ pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
+ pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
+ pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap);
+ pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap);
+#endif
+}
+
+/*
+ * MMU is enabled and page table setup is complete, so from now, we can use
+ * generic page allocation functions to setup page table.
+ */
+static void __init pt_ops_set_late(void)
+{
+ pt_ops.alloc_pte = alloc_pte_late;
+ pt_ops.get_pte_virt = get_pte_virt_late;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_late;
+ pt_ops.get_pmd_virt = get_pmd_virt_late;
+ pt_ops.alloc_pud = alloc_pud_late;
+ pt_ops.get_pud_virt = get_pud_virt_late;
+ pt_ops.alloc_p4d = alloc_p4d_late;
+ pt_ops.get_p4d_virt = get_p4d_virt_late;
+#endif
+}
+
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+{
+ pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
+
+ kernel_map.virt_addr = KERNEL_LINK_ADDR;
+ kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
+
+#ifdef CONFIG_XIP_KERNEL
+ kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
+ kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
+
+ phys_ram_base = CONFIG_PHYS_RAM_BASE;
+ kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
+ kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata);
+
+ kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom;
+#else
+ kernel_map.phys_addr = (uintptr_t)(&_start);
+ kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
+#endif
+
+#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+ set_satp_mode();
+#endif
+
+ kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
+ kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
+
+ riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
+
+ /*
+ * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
+ * kernel, whereas for 64-bit kernel, the end of the virtual address
+ * space is occupied by the modules/BPF/kernel mappings which reduces
+ * the available size of the linear mapping.
+ */
+ memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0);
/* Sanity check alignment and size */
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
- BUG_ON((load_pa % map_size) != 0);
- BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);
+ BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
+
+#ifdef CONFIG_64BIT
+ /*
+ * The last 4K bytes of the addressable memory can not be mapped because
+ * of IS_ERR_VALUE macro.
+ */
+ BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
+#endif
+
+ apply_early_boot_alternatives();
+ pt_ops_set_early();
/* Setup early PGD for fixmap */
create_pgd_mapping(early_pg_dir, FIXADDR_START,
- (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
#ifndef __PAGETABLE_PMD_FOLDED
- /* Setup fixmap PMD */
+ /* Setup fixmap P4D and PUD */
+ if (pgtable_l5_enabled)
+ create_p4d_mapping(fixmap_p4d, FIXADDR_START,
+ (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
+ /* Setup fixmap PUD and PMD */
+ if (pgtable_l4_enabled)
+ create_pud_mapping(fixmap_pud, FIXADDR_START,
+ (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
create_pmd_mapping(fixmap_pmd, FIXADDR_START,
(uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
/* Setup trampoline PGD and PMD */
- create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
- (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
- create_pmd_mapping(trampoline_pmd, PAGE_OFFSET,
- load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
+ create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
+ trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ if (pgtable_l5_enabled)
+ create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
+ (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
+ if (pgtable_l4_enabled)
+ create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
+ (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
+#ifdef CONFIG_XIP_KERNEL
+ create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
+ kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
+#else
+ create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
+ kernel_map.phys_addr, PMD_SIZE, PAGE_KERNEL_EXEC);
+#endif
#else
/* Setup trampoline PGD */
- create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
- load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC);
+ create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
+ kernel_map.phys_addr, PGDIR_SIZE, PAGE_KERNEL_EXEC);
#endif
/*
- * Setup early PGD covering entire kernel which will allows
+ * Setup early PGD covering entire kernel which will allow
* us to reach paging_init(). We map all memory banks later
* in setup_vm_final() below.
*/
- end_va = PAGE_OFFSET + load_sz;
- for (va = PAGE_OFFSET; va < end_va; va += map_size)
- create_pgd_mapping(early_pg_dir, va,
- load_pa + (va - PAGE_OFFSET),
- map_size, PAGE_KERNEL_EXEC);
-
- /* Create fixed mapping for early FDT parsing */
- end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
- for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE)
- create_pte_mapping(fixmap_pte, va,
- dtb_pa + (va - __fix_to_virt(FIX_FDT)),
- PAGE_SIZE, PAGE_KERNEL);
-
- /* Save pointer to DTB for early FDT parsing */
- dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK);
- /* Save physical address for memblock reservation */
- dtb_early_pa = dtb_pa;
+ create_kernel_page_table(early_pg_dir, true);
+
+ /* Setup early mapping for FDT early scan */
+ create_fdt_early_page_table(early_pg_dir, dtb_pa);
+
+ /*
+ * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
+ * range can not span multiple pmds.
+ */
+ BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+#ifndef __PAGETABLE_PMD_FOLDED
+ /*
+ * Early ioremap fixmap is already created as it lies within first 2MB
+ * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END
+ * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn
+ * the user if not.
+ */
+ fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))];
+ fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))];
+ if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) {
+ WARN_ON(1);
+ pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n",
+ pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd));
+ pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+ fix_to_virt(FIX_BTMAP_BEGIN));
+ pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
+ fix_to_virt(FIX_BTMAP_END));
+
+ pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
+ pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
+ }
+#endif
+
+ pt_ops_set_fixmap();
}
static void __init setup_vm_final(void)
{
uintptr_t va, map_size;
phys_addr_t pa, start, end;
- struct memblock_region *reg;
-
- /* Set mmu_enabled flag */
- mmu_enabled = true;
+ u64 i;
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
- /* Map all memory banks */
- for_each_memblock(memory, reg) {
- start = reg->base;
- end = start + reg->size;
-
+ /* Map all memory banks in the linear mapping */
+ for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
- if (memblock_is_nomap(reg))
- continue;
if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET);
+ if (end >= __pa(PAGE_OFFSET) + memory_limit)
+ end = __pa(PAGE_OFFSET) + memory_limit;
map_size = best_map_size(start, end - start);
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
- create_pgd_mapping(swapper_pg_dir, va, pa,
- map_size, PAGE_KERNEL_EXEC);
+
+ create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
+ pgprot_from_va(va));
}
}
+ /* Map the kernel */
+ if (IS_ENABLED(CONFIG_64BIT))
+ create_kernel_page_table(swapper_pg_dir, false);
+
+#ifdef CONFIG_KASAN
+ kasan_swapper_init();
+#endif
+
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
+ clear_fixmap(FIX_PUD);
+ clear_fixmap(FIX_P4D);
/* Move to swapper page table */
- csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
+ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
local_flush_tlb_all();
+
+ pt_ops_set_late();
}
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
-#ifdef CONFIG_BUILTIN_DTB
- dtb_early_va = soc_lookup_builtin_dtb();
- if (!dtb_early_va) {
- /* Fallback to first available DTS */
- dtb_early_va = (void *) __dtb_start;
- }
-#else
dtb_early_va = (void *)dtb_pa;
-#endif
+ dtb_early_pa = dtb_pa;
}
static inline void setup_vm_final(void)
@@ -499,37 +1128,94 @@ static inline void setup_vm_final(void)
}
#endif /* CONFIG_MMU */
-#ifdef CONFIG_STRICT_KERNEL_RWX
-void mark_rodata_ro(void)
+/*
+ * reserve_crashkernel() - reserves memory for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
{
- unsigned long text_start = (unsigned long)_text;
- unsigned long text_end = (unsigned long)_etext;
- unsigned long rodata_start = (unsigned long)__start_rodata;
- unsigned long data_start = (unsigned long)_data;
- unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
+ unsigned long long crash_base = 0;
+ unsigned long long crash_size = 0;
+ unsigned long search_start = memblock_start_of_DRAM();
+ unsigned long search_end = memblock_end_of_DRAM();
- set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
- set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
- set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
- set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
+ int ret = 0;
- debug_checkwx();
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+ /*
+ * Don't reserve a region for a crash kernel on a crash kernel
+ * since it doesn't make much sense and we have limited memory
+ * resources.
+ */
+ if (is_kdump_kernel()) {
+ pr_info("crashkernel: ignoring reservation request\n");
+ return;
+ }
+
+ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ &crash_size, &crash_base);
+ if (ret || !crash_size)
+ return;
+
+ crash_size = PAGE_ALIGN(crash_size);
+
+ if (crash_base) {
+ search_start = crash_base;
+ search_end = crash_base + crash_size;
+ }
+
+ /*
+ * Current riscv boot protocol requires 2MB alignment for
+ * RV64 and 4MB alignment for RV32 (hugepage size)
+ *
+ * Try to alloc from 32bit addressible physical memory so that
+ * swiotlb can work on the crash kernel.
+ */
+ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
+ search_start,
+ min(search_end, (unsigned long) SZ_4G));
+ if (crash_base == 0) {
+ /* Try again without restricting region to 32bit addressible memory */
+ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
+ search_start, search_end);
+ if (crash_base == 0) {
+ pr_warn("crashkernel: couldn't allocate %lldKB\n",
+ crash_size >> 10);
+ return;
+ }
+ }
+
+ pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
+ crash_base, crash_base + crash_size, crash_size >> 20);
+
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
}
-#endif
void __init paging_init(void)
{
+ setup_bootmem();
setup_vm_final();
- memblocks_present();
+}
+
+void __init misc_mem_init(void)
+{
+ early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
+ arch_numa_init();
sparse_init();
- setup_zero_page();
zone_sizes_init();
+ reserve_crashkernel();
+ memblock_dump_all();
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
- return vmemmap_populate_basepages(start, end, node);
+ return vmemmap_populate_basepages(start, end, node, NULL);
}
#endif
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 4a8b61806633..a22e418dbd82 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -9,98 +9,454 @@
#include <linux/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
+#include <asm/pgalloc.h>
+
+/*
+ * Kasan shadow region must lie at a fixed address across sv39, sv48 and sv57
+ * which is right before the kernel.
+ *
+ * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate
+ * the page global directory with kasan_early_shadow_pmd.
+ *
+ * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping
+ * must be divided as follows:
+ * - the first PGD entry, although incomplete, is populated with
+ * kasan_early_shadow_pud/p4d
+ * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d
+ * - the last PGD entry is shared with the kernel mapping so populated at the
+ * lower levels pud/p4d
+ *
+ * In addition, when shallow populating a kasan region (for example vmalloc),
+ * this region may also not be aligned on PGDIR size, so we must go down to the
+ * pud level too.
+ */
extern pgd_t early_pg_dir[PTRS_PER_PGD];
+
+static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
+{
+ phys_addr_t phys_addr;
+ pte_t *ptep, *base_pte;
+
+ if (pmd_none(*pmd))
+ base_pte = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
+ else
+ base_pte = (pte_t *)pmd_page_vaddr(*pmd);
+
+ ptep = base_pte + pte_index(vaddr);
+
+ do {
+ if (pte_none(*ptep)) {
+ phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ }
+ } while (ptep++, vaddr += PAGE_SIZE, vaddr != end);
+
+ set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
+}
+
+static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end)
+{
+ phys_addr_t phys_addr;
+ pmd_t *pmdp, *base_pmd;
+ unsigned long next;
+
+ if (pud_none(*pud)) {
+ base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ } else {
+ base_pmd = (pmd_t *)pud_pgtable(*pud);
+ if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+ base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ }
+
+ pmdp = base_pmd + pmd_index(vaddr);
+
+ do {
+ next = pmd_addr_end(vaddr, end);
+
+ if (pmd_none(*pmdp) && IS_ALIGNED(vaddr, PMD_SIZE) && (next - vaddr) >= PMD_SIZE) {
+ phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE);
+ if (phys_addr) {
+ set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+
+ kasan_populate_pte(pmdp, vaddr, next);
+ } while (pmdp++, vaddr = next, vaddr != end);
+
+ /*
+ * Wait for the whole PGD to be populated before setting the PGD in
+ * the page table, otherwise, if we did set the PGD before populating
+ * it entirely, memblock could allocate a page at a physical address
+ * where KASAN is not populated yet and then we'd get a page fault.
+ */
+ set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+}
+
+static void __init kasan_populate_pud(pgd_t *pgd,
+ unsigned long vaddr, unsigned long end,
+ bool early)
+{
+ phys_addr_t phys_addr;
+ pud_t *pudp, *base_pud;
+ unsigned long next;
+
+ if (early) {
+ /*
+ * We can't use pgd_page_vaddr here as it would return a linear
+ * mapping address but it is not mapped yet, but when populating
+ * early_pg_dir, we need the physical address and when populating
+ * swapper_pg_dir, we need the kernel virtual address so use
+ * pt_ops facility.
+ */
+ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
+ } else if (pgd_none(*pgd)) {
+ base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
+ } else {
+ base_pud = (pud_t *)pgd_page_vaddr(*pgd);
+ if (base_pud == lm_alias(kasan_early_shadow_pud)) {
+ base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
+ memcpy(base_pud, (void *)kasan_early_shadow_pud,
+ sizeof(pud_t) * PTRS_PER_PUD);
+ }
+ }
+
+ pudp = base_pud + pud_index(vaddr);
+
+ do {
+ next = pud_addr_end(vaddr, end);
+
+ if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
+ if (early) {
+ phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd));
+ set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ } else {
+ phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
+ if (phys_addr) {
+ set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+ }
+
+ kasan_populate_pmd(pudp, vaddr, next);
+ } while (pudp++, vaddr = next, vaddr != end);
+
+ /*
+ * Wait for the whole PGD to be populated before setting the PGD in
+ * the page table, otherwise, if we did set the PGD before populating
+ * it entirely, memblock could allocate a page at a physical address
+ * where KASAN is not populated yet and then we'd get a page fault.
+ */
+ if (!early)
+ set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
+}
+
+static void __init kasan_populate_p4d(pgd_t *pgd,
+ unsigned long vaddr, unsigned long end,
+ bool early)
+{
+ phys_addr_t phys_addr;
+ p4d_t *p4dp, *base_p4d;
+ unsigned long next;
+
+ if (early) {
+ /*
+ * We can't use pgd_page_vaddr here as it would return a linear
+ * mapping address but it is not mapped yet, but when populating
+ * early_pg_dir, we need the physical address and when populating
+ * swapper_pg_dir, we need the kernel virtual address so use
+ * pt_ops facility.
+ */
+ base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd)));
+ } else {
+ base_p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+ if (base_p4d == lm_alias(kasan_early_shadow_p4d))
+ base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE);
+ }
+
+ p4dp = base_p4d + p4d_index(vaddr);
+
+ do {
+ next = p4d_addr_end(vaddr, end);
+
+ if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) {
+ if (early) {
+ phys_addr = __pa(((uintptr_t)kasan_early_shadow_pud));
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ } else {
+ phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE);
+ if (phys_addr) {
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+ }
+
+ kasan_populate_pud((pgd_t *)p4dp, vaddr, next, early);
+ } while (p4dp++, vaddr = next, vaddr != end);
+
+ /*
+ * Wait for the whole P4D to be populated before setting the P4D in
+ * the page table, otherwise, if we did set the P4D before populating
+ * it entirely, memblock could allocate a page at a physical address
+ * where KASAN is not populated yet and then we'd get a page fault.
+ */
+ if (!early)
+ set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_p4d)), PAGE_TABLE));
+}
+
+#define kasan_early_shadow_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)kasan_early_shadow_p4d : \
+ (pgtable_l4_enabled ? \
+ (uintptr_t)kasan_early_shadow_pud : \
+ (uintptr_t)kasan_early_shadow_pmd))
+#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \
+ (pgtable_l5_enabled ? \
+ kasan_populate_p4d(pgdp, vaddr, next, early) : \
+ (pgtable_l4_enabled ? \
+ kasan_populate_pud(pgdp, vaddr, next, early) : \
+ kasan_populate_pmd((pud_t *)pgdp, vaddr, next)))
+
+static void __init kasan_populate_pgd(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end,
+ bool early)
+{
+ phys_addr_t phys_addr;
+ unsigned long next;
+
+ do {
+ next = pgd_addr_end(vaddr, end);
+
+ if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
+ if (early) {
+ phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next);
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ } else if (pgd_page_vaddr(*pgdp) ==
+ (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) {
+ /*
+ * pgdp can't be none since kasan_early_init
+ * initialized all KASAN shadow region with
+ * kasan_early_shadow_pud: if this is still the
+ * case, that means we can try to allocate a
+ * hugepage as a replacement.
+ */
+ phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
+ if (phys_addr) {
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+ }
+
+ kasan_populate_pgd_next(pgdp, vaddr, next, early);
+ } while (pgdp++, vaddr = next, vaddr != end);
+}
+
asmlinkage void __init kasan_early_init(void)
{
uintptr_t i;
- pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
+
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
+ KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
for (i = 0; i < PTRS_PER_PTE; ++i)
set_pte(kasan_early_shadow_pte + i,
- mk_pte(virt_to_page(kasan_early_shadow_page),
- PAGE_KERNEL));
+ pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL));
for (i = 0; i < PTRS_PER_PMD; ++i)
set_pmd(kasan_early_shadow_pmd + i,
pfn_pmd(PFN_DOWN
- (__pa((uintptr_t) kasan_early_shadow_pte)),
- __pgprot(_PAGE_TABLE)));
-
- for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
- i += PGDIR_SIZE, ++pgd)
- set_pgd(pgd,
- pfn_pgd(PFN_DOWN
- (__pa(((uintptr_t) kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
-
- /* init for swapper_pg_dir */
- pgd = pgd_offset_k(KASAN_SHADOW_START);
-
- for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
- i += PGDIR_SIZE, ++pgd)
- set_pgd(pgd,
- pfn_pgd(PFN_DOWN
- (__pa(((uintptr_t) kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
-
- flush_tlb_all();
+ (__pa((uintptr_t)kasan_early_shadow_pte)),
+ PAGE_TABLE));
+
+ if (pgtable_l4_enabled) {
+ for (i = 0; i < PTRS_PER_PUD; ++i)
+ set_pud(kasan_early_shadow_pud + i,
+ pfn_pud(PFN_DOWN
+ (__pa(((uintptr_t)kasan_early_shadow_pmd))),
+ PAGE_TABLE));
+ }
+
+ if (pgtable_l5_enabled) {
+ for (i = 0; i < PTRS_PER_P4D; ++i)
+ set_p4d(kasan_early_shadow_p4d + i,
+ pfn_p4d(PFN_DOWN
+ (__pa(((uintptr_t)kasan_early_shadow_pud))),
+ PAGE_TABLE));
+ }
+
+ kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+
+ local_flush_tlb_all();
}
-static void __init populate(void *start, void *end)
+void __init kasan_swapper_init(void)
+{
+ kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+
+ local_flush_tlb_all();
+}
+
+static void __init kasan_populate(void *start, void *end)
{
- unsigned long i, offset;
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
- unsigned long n_pages = (vend - vaddr) / PAGE_SIZE;
- unsigned long n_ptes =
- ((n_pages + PTRS_PER_PTE) & -PTRS_PER_PTE) / PTRS_PER_PTE;
- unsigned long n_pmds =
- ((n_ptes + PTRS_PER_PMD) & -PTRS_PER_PMD) / PTRS_PER_PMD;
-
- pte_t *pte =
- memblock_alloc(n_ptes * PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
- pmd_t *pmd =
- memblock_alloc(n_pmds * PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
- pgd_t *pgd = pgd_offset_k(vaddr);
-
- for (i = 0; i < n_pages; i++) {
- phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
- set_pte(&pte[i], pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
- }
- for (i = 0, offset = 0; i < n_ptes; i++, offset += PTRS_PER_PTE)
- set_pmd(&pmd[i],
- pfn_pmd(PFN_DOWN(__pa(&pte[offset])),
- __pgprot(_PAGE_TABLE)));
+ kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false);
- for (i = 0, offset = 0; i < n_pmds; i++, offset += PTRS_PER_PMD)
- set_pgd(&pgd[i],
- pfn_pgd(PFN_DOWN(__pa(&pmd[offset])),
- __pgprot(_PAGE_TABLE)));
+ local_flush_tlb_all();
+ memset(start, KASAN_SHADOW_INIT, end - start);
+}
+
+static void __init kasan_shallow_populate_pmd(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end)
+{
+ unsigned long next;
+ pmd_t *pmdp, *base_pmd;
+ bool is_kasan_pte;
- flush_tlb_all();
- memset(start, 0, end - start);
+ base_pmd = (pmd_t *)pgd_page_vaddr(*pgdp);
+ pmdp = base_pmd + pmd_index(vaddr);
+
+ do {
+ next = pmd_addr_end(vaddr, end);
+ is_kasan_pte = (pmd_pgtable(*pmdp) == lm_alias(kasan_early_shadow_pte));
+
+ if (is_kasan_pte)
+ pmd_clear(pmdp);
+ } while (pmdp++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end)
+{
+ unsigned long next;
+ pud_t *pudp, *base_pud;
+ pmd_t *base_pmd;
+ bool is_kasan_pmd;
+
+ base_pud = (pud_t *)pgd_page_vaddr(*pgdp);
+ pudp = base_pud + pud_index(vaddr);
+
+ do {
+ next = pud_addr_end(vaddr, end);
+ is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd));
+
+ if (!is_kasan_pmd)
+ continue;
+
+ base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+
+ if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE)
+ continue;
+
+ memcpy(base_pmd, (void *)kasan_early_shadow_pmd, PAGE_SIZE);
+ kasan_shallow_populate_pmd((pgd_t *)pudp, vaddr, next);
+ } while (pudp++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_shallow_populate_p4d(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end)
+{
+ unsigned long next;
+ p4d_t *p4dp, *base_p4d;
+ pud_t *base_pud;
+ bool is_kasan_pud;
+
+ base_p4d = (p4d_t *)pgd_page_vaddr(*pgdp);
+ p4dp = base_p4d + p4d_index(vaddr);
+
+ do {
+ next = p4d_addr_end(vaddr, end);
+ is_kasan_pud = (p4d_pgtable(*p4dp) == lm_alias(kasan_early_shadow_pud));
+
+ if (!is_kasan_pud)
+ continue;
+
+ base_pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
+
+ if (IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE)
+ continue;
+
+ memcpy(base_pud, (void *)kasan_early_shadow_pud, PAGE_SIZE);
+ kasan_shallow_populate_pud((pgd_t *)p4dp, vaddr, next);
+ } while (p4dp++, vaddr = next, vaddr != end);
+}
+
+#define kasan_shallow_populate_pgd_next(pgdp, vaddr, next) \
+ (pgtable_l5_enabled ? \
+ kasan_shallow_populate_p4d(pgdp, vaddr, next) : \
+ (pgtable_l4_enabled ? \
+ kasan_shallow_populate_pud(pgdp, vaddr, next) : \
+ kasan_shallow_populate_pmd(pgdp, vaddr, next)))
+
+static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
+{
+ unsigned long next;
+ void *p;
+ pgd_t *pgd_k = pgd_offset_k(vaddr);
+ bool is_kasan_pgd_next;
+
+ do {
+ next = pgd_addr_end(vaddr, end);
+ is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) ==
+ (unsigned long)lm_alias(kasan_early_shadow_pgd_next));
+
+ if (is_kasan_pgd_next) {
+ p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
+ }
+
+ if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE)
+ continue;
+
+ memcpy(p, (void *)kasan_early_shadow_pgd_next, PAGE_SIZE);
+ kasan_shallow_populate_pgd_next(pgd_k, vaddr, next);
+ } while (pgd_k++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_shallow_populate(void *start, void *end)
+{
+ unsigned long vaddr = (unsigned long)start & PAGE_MASK;
+ unsigned long vend = PAGE_ALIGN((unsigned long)end);
+
+ kasan_shallow_populate_pgd(vaddr, vend);
+ local_flush_tlb_all();
}
void __init kasan_init(void)
{
- struct memblock_region *reg;
- unsigned long i;
+ phys_addr_t p_start, p_end;
+ u64 i;
- kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
- (void *)kasan_mem_to_shadow((void *)
- VMALLOC_END));
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+ kasan_shallow_populate(
+ (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
+ (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
- for_each_memblock(memory, reg) {
- void *start = (void *)__va(reg->base);
- void *end = (void *)__va(reg->base + reg->size);
+ /* Populate the linear mapping */
+ for_each_mem_range(i, &p_start, &p_end) {
+ void *start = (void *)__va(p_start);
+ void *end = (void *)__va(p_end);
if (start >= end)
break;
- populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
- };
+ kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
+ }
+
+ /* Populate kernel, BPF, modules mapping */
+ kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR),
+ kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G));
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(&kasan_early_shadow_pte[i],
@@ -108,6 +464,6 @@ void __init kasan_init(void)
__pgprot(_PAGE_PRESENT | _PAGE_READ |
_PAGE_ACCESSED)));
- memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+ memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
init_task.kasan_depth = 0;
}
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index ec2c70f84994..5e49e4b4a4cc 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -7,6 +7,7 @@
#include <linux/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/bitops.h>
+#include <asm/set_memory.h>
struct pageattr_masks {
pgprot_t set_mask;
@@ -94,7 +95,7 @@ static int pageattr_pte_hole(unsigned long addr, unsigned long next,
return 0;
}
-const static struct mm_walk_ops pageattr_ops = {
+static const struct mm_walk_ops pageattr_ops = {
.pgd_entry = pageattr_pgd_entry,
.p4d_entry = pageattr_p4d_entry,
.pud_entry = pageattr_pud_entry,
@@ -127,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
return ret;
}
+int set_memory_rw_nx(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE),
+ __pgprot(_PAGE_EXEC));
+}
+
int set_memory_ro(unsigned long addr, int numpages)
{
return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
@@ -151,6 +158,7 @@ int set_memory_nx(unsigned long addr, int numpages)
int set_direct_map_invalid_noflush(struct page *page)
{
+ int ret;
unsigned long start = (unsigned long)page_address(page);
unsigned long end = start + PAGE_SIZE;
struct pageattr_masks masks = {
@@ -158,11 +166,16 @@ int set_direct_map_invalid_noflush(struct page *page)
.clear_mask = __pgprot(_PAGE_PRESENT)
};
- return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
+ mmap_read_lock(&init_mm);
+ ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
+ mmap_read_unlock(&init_mm);
+
+ return ret;
}
int set_direct_map_default_noflush(struct page *page)
{
+ int ret;
unsigned long start = (unsigned long)page_address(page);
unsigned long end = start + PAGE_SIZE;
struct pageattr_masks masks = {
@@ -170,9 +183,14 @@ int set_direct_map_default_noflush(struct page *page)
.clear_mask = __pgprot(0)
};
- return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
+ mmap_read_lock(&init_mm);
+ ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
+ mmap_read_unlock(&init_mm);
+
+ return ret;
}
+#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
if (!debug_pagealloc_enabled())
@@ -185,3 +203,33 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
__set_memory((unsigned long)page_address(page), numpages,
__pgprot(0), __pgprot(_PAGE_PRESENT));
}
+#endif
+
+bool kernel_page_present(struct page *page)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+ pgd_t *pgd;
+ pud_t *pud;
+ p4d_t *p4d;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(addr);
+ if (!pgd_present(*pgd))
+ return false;
+
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d))
+ return false;
+
+ pud = pud_offset(p4d, addr);
+ if (!pud_present(*pud))
+ return false;
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd))
+ return false;
+
+ pte = pte_offset_kernel(pmd, addr);
+ return pte_present(*pte);
+}
diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
index e8e4dcd39fed..19cf25a74ee2 100644
--- a/arch/riscv/mm/physaddr.c
+++ b/arch/riscv/mm/physaddr.c
@@ -8,12 +8,10 @@
phys_addr_t __virt_to_phys(unsigned long x)
{
- phys_addr_t y = x - PAGE_OFFSET;
-
/*
* Boundary checking aginst the kernel linear mapping space.
*/
- WARN(y >= KERN_VIRT_SIZE,
+ WARN(!is_linear_mapping(x) && !is_kernel_mapping(x),
"virt_to_phys used for non-linear address: %pK (%pS)\n",
(void *)x, (void *)x);
@@ -23,7 +21,7 @@ EXPORT_SYMBOL(__virt_to_phys);
phys_addr_t __phys_addr_symbol(unsigned long x)
{
- unsigned long kernel_start = (unsigned long)PAGE_OFFSET;
+ unsigned long kernel_start = kernel_map.virt_addr;
unsigned long kernel_end = (unsigned long)_end;
/*
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 0831c2e61a8f..830e7de65e3a 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -3,6 +3,7 @@
* Copyright (C) 2019 SiFive
*/
+#include <linux/efi.h>
#include <linux/init.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
@@ -49,25 +50,82 @@ struct addr_marker {
const char *name;
};
+/* Private information for debugfs */
+struct ptd_mm_info {
+ struct mm_struct *mm;
+ const struct addr_marker *markers;
+ unsigned long base_addr;
+ unsigned long end;
+};
+
+enum address_markers_idx {
+#ifdef CONFIG_KASAN
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+#endif
+ FIXMAP_START_NR,
+ FIXMAP_END_NR,
+ PCI_IO_START_NR,
+ PCI_IO_END_NR,
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ VMEMMAP_START_NR,
+ VMEMMAP_END_NR,
+#endif
+ VMALLOC_START_NR,
+ VMALLOC_END_NR,
+ PAGE_OFFSET_NR,
+#ifdef CONFIG_64BIT
+ MODULES_MAPPING_NR,
+ KERNEL_MAPPING_NR,
+#endif
+ END_OF_SPACE_NR
+};
+
static struct addr_marker address_markers[] = {
#ifdef CONFIG_KASAN
- {KASAN_SHADOW_START, "Kasan shadow start"},
- {KASAN_SHADOW_END, "Kasan shadow end"},
+ {0, "Kasan shadow start"},
+ {0, "Kasan shadow end"},
#endif
- {FIXADDR_START, "Fixmap start"},
- {FIXADDR_TOP, "Fixmap end"},
- {PCI_IO_START, "PCI I/O start"},
- {PCI_IO_END, "PCI I/O end"},
+ {0, "Fixmap start"},
+ {0, "Fixmap end"},
+ {0, "PCI I/O start"},
+ {0, "PCI I/O end"},
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- {VMEMMAP_START, "vmemmap start"},
- {VMEMMAP_END, "vmemmap end"},
+ {0, "vmemmap start"},
+ {0, "vmemmap end"},
+#endif
+ {0, "vmalloc() area"},
+ {0, "vmalloc() end"},
+ {0, "Linear mapping"},
+#ifdef CONFIG_64BIT
+ {0, "Modules/BPF mapping"},
+ {0, "Kernel mapping"},
#endif
- {VMALLOC_START, "vmalloc() area"},
- {VMALLOC_END, "vmalloc() end"},
- {PAGE_OFFSET, "Linear mapping"},
{-1, NULL},
};
+static struct ptd_mm_info kernel_ptd_info = {
+ .mm = &init_mm,
+ .markers = address_markers,
+ .base_addr = 0,
+ .end = ULONG_MAX,
+};
+
+#ifdef CONFIG_EFI
+static struct addr_marker efi_addr_markers[] = {
+ { 0, "UEFI runtime start" },
+ { SZ_1G, "UEFI runtime end" },
+ { -1, NULL }
+};
+
+static struct ptd_mm_info efi_ptd_info = {
+ .mm = &efi_mm,
+ .markers = efi_addr_markers,
+ .base_addr = 0,
+ .end = SZ_2G,
+};
+#endif
+
/* Page Table Entry */
struct prot_bits {
u64 mask;
@@ -245,22 +303,22 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr,
}
}
-static void ptdump_walk(struct seq_file *s)
+static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
{
struct pg_state st = {
.seq = s,
- .marker = address_markers,
+ .marker = pinfo->markers,
.level = -1,
.ptdump = {
.note_page = note_page,
.range = (struct ptdump_range[]) {
- {KERN_VIRT_START, ULONG_MAX},
+ {pinfo->base_addr, pinfo->end},
{0, 0}
}
}
};
- ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+ ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL);
}
void ptdump_check_wx(void)
@@ -293,23 +351,50 @@ void ptdump_check_wx(void)
static int ptdump_show(struct seq_file *m, void *v)
{
- ptdump_walk(m);
+ ptdump_walk(m, m->private);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
-static int ptdump_init(void)
+static int __init ptdump_init(void)
{
unsigned int i, j;
+#ifdef CONFIG_KASAN
+ address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
+ address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
+#endif
+ address_markers[FIXMAP_START_NR].start_address = FIXADDR_START;
+ address_markers[FIXMAP_END_NR].start_address = FIXADDR_TOP;
+ address_markers[PCI_IO_START_NR].start_address = PCI_IO_START;
+ address_markers[PCI_IO_END_NR].start_address = PCI_IO_END;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+ address_markers[VMEMMAP_END_NR].start_address = VMEMMAP_END;
+#endif
+ address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
+ address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
+ address_markers[PAGE_OFFSET_NR].start_address = PAGE_OFFSET;
+#ifdef CONFIG_64BIT
+ address_markers[MODULES_MAPPING_NR].start_address = MODULES_VADDR;
+ address_markers[KERNEL_MAPPING_NR].start_address = kernel_map.virt_addr;
+#endif
+
+ kernel_ptd_info.base_addr = KERN_VIRT_START;
+
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
pg_level[i].mask |= pte_bits[j].mask;
- debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
+ debugfs_create_file("kernel_page_tables", 0400, NULL, &kernel_ptd_info,
&ptdump_fops);
+#ifdef CONFIG_EFI
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
+ debugfs_create_file("efi_page_tables", 0400, NULL, &efi_ptd_info,
+ &ptdump_fops);
+#endif
return 0;
}
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 720b443c4528..37ed760d007c 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -4,36 +4,61 @@
#include <linux/smp.h>
#include <linux/sched.h>
#include <asm/sbi.h>
+#include <asm/mmu_context.h>
+
+static inline void local_flush_tlb_all_asid(unsigned long asid)
+{
+ __asm__ __volatile__ ("sfence.vma x0, %0"
+ :
+ : "r" (asid)
+ : "memory");
+}
+
+static inline void local_flush_tlb_page_asid(unsigned long addr,
+ unsigned long asid)
+{
+ __asm__ __volatile__ ("sfence.vma %0, %1"
+ :
+ : "r" (addr), "r" (asid)
+ : "memory");
+}
void flush_tlb_all(void)
{
sbi_remote_sfence_vma(NULL, 0, -1);
}
-/*
- * This function must not be called with cmask being null.
- * Kernel may panic if cmask is NULL.
- */
-static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start,
- unsigned long size)
+static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
+ unsigned long size, unsigned long stride)
{
- struct cpumask hmask;
+ struct cpumask *cmask = mm_cpumask(mm);
unsigned int cpuid;
+ bool broadcast;
if (cpumask_empty(cmask))
return;
cpuid = get_cpu();
+ /* check if the tlbflush needs to be sent to other CPUs */
+ broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
+ if (static_branch_unlikely(&use_asid_allocator)) {
+ unsigned long asid = atomic_long_read(&mm->context.id);
- if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
- /* local cpu is the only cpu present in cpumask */
- if (size <= PAGE_SIZE)
+ if (broadcast) {
+ sbi_remote_sfence_vma_asid(cmask, start, size, asid);
+ } else if (size <= stride) {
+ local_flush_tlb_page_asid(start, asid);
+ } else {
+ local_flush_tlb_all_asid(asid);
+ }
+ } else {
+ if (broadcast) {
+ sbi_remote_sfence_vma(cmask, start, size);
+ } else if (size <= stride) {
local_flush_tlb_page(start);
- else
+ } else {
local_flush_tlb_all();
- } else {
- riscv_cpuid_to_hartid_mask(cmask, &hmask);
- sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size);
+ }
}
put_cpu();
@@ -41,16 +66,23 @@ static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start,
void flush_tlb_mm(struct mm_struct *mm)
{
- __sbi_tlb_flush_range(mm_cpumask(mm), 0, -1);
+ __sbi_tlb_flush_range(mm, 0, -1, PAGE_SIZE);
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
{
- __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE);
+ __sbi_tlb_flush_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE);
}
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
- __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start);
+ __sbi_tlb_flush_range(vma->vm_mm, start, end - start, PAGE_SIZE);
+}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ __sbi_tlb_flush_range(vma->vm_mm, start, end - start, PMD_SIZE);
}
+#endif
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index 20e235d06f66..2a3715bf29fe 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -13,6 +13,11 @@
#include <linux/filter.h>
#include <asm/cacheflush.h>
+static inline bool rvc_enabled(void)
+{
+ return IS_ENABLED(CONFIG_RISCV_ISA_C);
+}
+
enum {
RV_REG_ZERO = 0, /* The constant value 0 */
RV_REG_RA = 1, /* Return address */
@@ -48,16 +53,35 @@ enum {
RV_REG_T6 = 31,
};
+static inline bool is_creg(u8 reg)
+{
+ return (1 << reg) & (BIT(RV_REG_FP) |
+ BIT(RV_REG_S1) |
+ BIT(RV_REG_A0) |
+ BIT(RV_REG_A1) |
+ BIT(RV_REG_A2) |
+ BIT(RV_REG_A3) |
+ BIT(RV_REG_A4) |
+ BIT(RV_REG_A5));
+}
+
struct rv_jit_context {
struct bpf_prog *prog;
- u32 *insns; /* RV insns */
+ u16 *insns; /* RV insns */
int ninsns;
int epilogue_offset;
int *offset; /* BPF to RV */
+ int nexentries;
unsigned long flags;
int stack_size;
};
+/* Convert from ninsns to bytes. */
+static inline int ninsns_rvoff(int ninsns)
+{
+ return ninsns << 1;
+}
+
struct rv_jit_data {
struct bpf_binary_header *header;
u8 *image;
@@ -74,8 +98,22 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}
+/* Emit a 4-byte riscv instruction. */
static inline void emit(const u32 insn, struct rv_jit_context *ctx)
{
+ if (ctx->insns) {
+ ctx->insns[ctx->ninsns] = insn;
+ ctx->insns[ctx->ninsns + 1] = (insn >> 16);
+ }
+
+ ctx->ninsns += 2;
+}
+
+/* Emit a 2-byte riscv compressed instruction. */
+static inline void emitc(const u16 insn, struct rv_jit_context *ctx)
+{
+ BUILD_BUG_ON(!rvc_enabled());
+
if (ctx->insns)
ctx->insns[ctx->ninsns] = insn;
@@ -86,7 +124,7 @@ static inline int epilogue_offset(struct rv_jit_context *ctx)
{
int to = ctx->epilogue_offset, from = ctx->ninsns;
- return (to - from) << 2;
+ return ninsns_rvoff(to - from);
}
/* Return -1 or inverted cond. */
@@ -117,6 +155,36 @@ static inline int invert_bpf_cond(u8 cond)
return -1;
}
+static inline bool is_6b_int(long val)
+{
+ return -(1L << 5) <= val && val < (1L << 5);
+}
+
+static inline bool is_7b_uint(unsigned long val)
+{
+ return val < (1UL << 7);
+}
+
+static inline bool is_8b_uint(unsigned long val)
+{
+ return val < (1UL << 8);
+}
+
+static inline bool is_9b_uint(unsigned long val)
+{
+ return val < (1UL << 9);
+}
+
+static inline bool is_10b_int(long val)
+{
+ return -(1L << 9) <= val && val < (1L << 9);
+}
+
+static inline bool is_10b_uint(unsigned long val)
+{
+ return val < (1UL << 10);
+}
+
static inline bool is_12b_int(long val)
{
return -(1L << 11) <= val && val < (1L << 11);
@@ -149,7 +217,7 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx)
off++; /* BPF branch is from PC+1, RV is from PC */
from = (insn > 0) ? ctx->offset[insn - 1] : 0;
to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
- return (to - from) << 2;
+ return ninsns_rvoff(to - from);
}
/* Instruction formats. */
@@ -207,6 +275,59 @@ static inline u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1,
return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode);
}
+/* RISC-V compressed instruction formats. */
+
+static inline u16 rv_cr_insn(u8 funct4, u8 rd, u8 rs2, u8 op)
+{
+ return (funct4 << 12) | (rd << 7) | (rs2 << 2) | op;
+}
+
+static inline u16 rv_ci_insn(u8 funct3, u32 imm6, u8 rd, u8 op)
+{
+ u32 imm;
+
+ imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2);
+ return (funct3 << 13) | (rd << 7) | op | imm;
+}
+
+static inline u16 rv_css_insn(u8 funct3, u32 uimm, u8 rs2, u8 op)
+{
+ return (funct3 << 13) | (uimm << 7) | (rs2 << 2) | op;
+}
+
+static inline u16 rv_ciw_insn(u8 funct3, u32 uimm, u8 rd, u8 op)
+{
+ return (funct3 << 13) | (uimm << 5) | ((rd & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cl_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rd,
+ u8 op)
+{
+ return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) |
+ (imm_lo << 5) | ((rd & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cs_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rs2,
+ u8 op)
+{
+ return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) |
+ (imm_lo << 5) | ((rs2 & 0x7) << 2) | op;
+}
+
+static inline u16 rv_ca_insn(u8 funct6, u8 rd, u8 funct2, u8 rs2, u8 op)
+{
+ return (funct6 << 10) | ((rd & 0x7) << 7) | (funct2 << 5) |
+ ((rs2 & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cb_insn(u8 funct3, u32 imm6, u8 funct2, u8 rd, u8 op)
+{
+ u32 imm;
+
+ imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2);
+ return (funct3 << 13) | (funct2 << 10) | ((rd & 0x7) << 7) | op | imm;
+}
+
/* Instructions shared by both RV32 and RV64. */
static inline u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0)
@@ -414,6 +535,172 @@ static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f);
}
+static inline u32 rv_amoand_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0xc, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_amoor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x8, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_amoxor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x4, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_amoswap_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x1, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_lr_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x2, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_sc_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x3, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_fence(u8 pred, u8 succ)
+{
+ u16 imm11_0 = pred << 4 | succ;
+
+ return rv_i_insn(imm11_0, 0, 0, 0, 0xf);
+}
+
+/* RVC instrutions. */
+
+static inline u16 rvc_addi4spn(u8 rd, u32 imm10)
+{
+ u32 imm;
+
+ imm = ((imm10 & 0x30) << 2) | ((imm10 & 0x3c0) >> 4) |
+ ((imm10 & 0x4) >> 1) | ((imm10 & 0x8) >> 3);
+ return rv_ciw_insn(0x0, imm, rd, 0x0);
+}
+
+static inline u16 rvc_lw(u8 rd, u32 imm7, u8 rs1)
+{
+ u32 imm_hi, imm_lo;
+
+ imm_hi = (imm7 & 0x38) >> 3;
+ imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6);
+ return rv_cl_insn(0x2, imm_hi, rs1, imm_lo, rd, 0x0);
+}
+
+static inline u16 rvc_sw(u8 rs1, u32 imm7, u8 rs2)
+{
+ u32 imm_hi, imm_lo;
+
+ imm_hi = (imm7 & 0x38) >> 3;
+ imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6);
+ return rv_cs_insn(0x6, imm_hi, rs1, imm_lo, rs2, 0x0);
+}
+
+static inline u16 rvc_addi(u8 rd, u32 imm6)
+{
+ return rv_ci_insn(0, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_li(u8 rd, u32 imm6)
+{
+ return rv_ci_insn(0x2, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_addi16sp(u32 imm10)
+{
+ u32 imm;
+
+ imm = ((imm10 & 0x200) >> 4) | (imm10 & 0x10) | ((imm10 & 0x40) >> 3) |
+ ((imm10 & 0x180) >> 6) | ((imm10 & 0x20) >> 5);
+ return rv_ci_insn(0x3, imm, RV_REG_SP, 0x1);
+}
+
+static inline u16 rvc_lui(u8 rd, u32 imm6)
+{
+ return rv_ci_insn(0x3, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_srli(u8 rd, u32 imm6)
+{
+ return rv_cb_insn(0x4, imm6, 0, rd, 0x1);
+}
+
+static inline u16 rvc_srai(u8 rd, u32 imm6)
+{
+ return rv_cb_insn(0x4, imm6, 0x1, rd, 0x1);
+}
+
+static inline u16 rvc_andi(u8 rd, u32 imm6)
+{
+ return rv_cb_insn(0x4, imm6, 0x2, rd, 0x1);
+}
+
+static inline u16 rvc_sub(u8 rd, u8 rs)
+{
+ return rv_ca_insn(0x23, rd, 0, rs, 0x1);
+}
+
+static inline u16 rvc_xor(u8 rd, u8 rs)
+{
+ return rv_ca_insn(0x23, rd, 0x1, rs, 0x1);
+}
+
+static inline u16 rvc_or(u8 rd, u8 rs)
+{
+ return rv_ca_insn(0x23, rd, 0x2, rs, 0x1);
+}
+
+static inline u16 rvc_and(u8 rd, u8 rs)
+{
+ return rv_ca_insn(0x23, rd, 0x3, rs, 0x1);
+}
+
+static inline u16 rvc_slli(u8 rd, u32 imm6)
+{
+ return rv_ci_insn(0, imm6, rd, 0x2);
+}
+
+static inline u16 rvc_lwsp(u8 rd, u32 imm8)
+{
+ u32 imm;
+
+ imm = ((imm8 & 0xc0) >> 6) | (imm8 & 0x3c);
+ return rv_ci_insn(0x2, imm, rd, 0x2);
+}
+
+static inline u16 rvc_jr(u8 rs1)
+{
+ return rv_cr_insn(0x8, rs1, RV_REG_ZERO, 0x2);
+}
+
+static inline u16 rvc_mv(u8 rd, u8 rs)
+{
+ return rv_cr_insn(0x8, rd, rs, 0x2);
+}
+
+static inline u16 rvc_jalr(u8 rs1)
+{
+ return rv_cr_insn(0x9, rs1, RV_REG_ZERO, 0x2);
+}
+
+static inline u16 rvc_add(u8 rd, u8 rs)
+{
+ return rv_cr_insn(0x9, rd, rs, 0x2);
+}
+
+static inline u16 rvc_swsp(u32 imm8, u8 rs2)
+{
+ u32 imm;
+
+ imm = (imm8 & 0x3c) | ((imm8 & 0xc0) >> 6);
+ return rv_css_insn(0x6, imm, rs2, 0x2);
+}
+
/*
* RV64-only instructions.
*
@@ -503,6 +790,264 @@ static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
}
+static inline u32 rv_amoand_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0xc, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+static inline u32 rv_amoor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x8, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+static inline u32 rv_amoxor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x4, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+static inline u32 rv_amoswap_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x1, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+static inline u32 rv_lr_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x2, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+static inline u32 rv_sc_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x3, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+/* RV64-only RVC instructions. */
+
+static inline u16 rvc_ld(u8 rd, u32 imm8, u8 rs1)
+{
+ u32 imm_hi, imm_lo;
+
+ imm_hi = (imm8 & 0x38) >> 3;
+ imm_lo = (imm8 & 0xc0) >> 6;
+ return rv_cl_insn(0x3, imm_hi, rs1, imm_lo, rd, 0x0);
+}
+
+static inline u16 rvc_sd(u8 rs1, u32 imm8, u8 rs2)
+{
+ u32 imm_hi, imm_lo;
+
+ imm_hi = (imm8 & 0x38) >> 3;
+ imm_lo = (imm8 & 0xc0) >> 6;
+ return rv_cs_insn(0x7, imm_hi, rs1, imm_lo, rs2, 0x0);
+}
+
+static inline u16 rvc_subw(u8 rd, u8 rs)
+{
+ return rv_ca_insn(0x27, rd, 0, rs, 0x1);
+}
+
+static inline u16 rvc_addiw(u8 rd, u32 imm6)
+{
+ return rv_ci_insn(0x1, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_ldsp(u8 rd, u32 imm9)
+{
+ u32 imm;
+
+ imm = ((imm9 & 0x1c0) >> 6) | (imm9 & 0x38);
+ return rv_ci_insn(0x3, imm, rd, 0x2);
+}
+
+static inline u16 rvc_sdsp(u32 imm9, u8 rs2)
+{
+ u32 imm;
+
+ imm = (imm9 & 0x38) | ((imm9 & 0x1c0) >> 6);
+ return rv_css_insn(0x7, imm, rs2, 0x2);
+}
+
+#endif /* __riscv_xlen == 64 */
+
+/* Helper functions that emit RVC instructions when possible. */
+
+static inline void emit_jalr(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd == RV_REG_RA && rs && !imm)
+ emitc(rvc_jalr(rs), ctx);
+ else if (rvc_enabled() && !rd && rs && !imm)
+ emitc(rvc_jr(rs), ctx);
+ else
+ emit(rv_jalr(rd, rs, imm), ctx);
+}
+
+static inline void emit_mv(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && rs)
+ emitc(rvc_mv(rd, rs), ctx);
+ else
+ emit(rv_addi(rd, rs, 0), ctx);
+}
+
+static inline void emit_add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && rd == rs1 && rs2)
+ emitc(rvc_add(rd, rs2), ctx);
+ else
+ emit(rv_add(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_addi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd == RV_REG_SP && rd == rs && is_10b_int(imm) && imm && !(imm & 0xf))
+ emitc(rvc_addi16sp(imm), ctx);
+ else if (rvc_enabled() && is_creg(rd) && rs == RV_REG_SP && is_10b_uint(imm) &&
+ !(imm & 0x3) && imm)
+ emitc(rvc_addi4spn(rd, imm), ctx);
+ else if (rvc_enabled() && rd && rd == rs && imm && is_6b_int(imm))
+ emitc(rvc_addi(rd, imm), ctx);
+ else
+ emit(rv_addi(rd, rs, imm), ctx);
+}
+
+static inline void emit_li(u8 rd, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && is_6b_int(imm))
+ emitc(rvc_li(rd, imm), ctx);
+ else
+ emit(rv_addi(rd, RV_REG_ZERO, imm), ctx);
+}
+
+static inline void emit_lui(u8 rd, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && rd != RV_REG_SP && is_6b_int(imm) && imm)
+ emitc(rvc_lui(rd, imm), ctx);
+ else
+ emit(rv_lui(rd, imm), ctx);
+}
+
+static inline void emit_slli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && rd == rs && imm && (u32)imm < __riscv_xlen)
+ emitc(rvc_slli(rd, imm), ctx);
+ else
+ emit(rv_slli(rd, rs, imm), ctx);
+}
+
+static inline void emit_andi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs && is_6b_int(imm))
+ emitc(rvc_andi(rd, imm), ctx);
+ else
+ emit(rv_andi(rd, rs, imm), ctx);
+}
+
+static inline void emit_srli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen)
+ emitc(rvc_srli(rd, imm), ctx);
+ else
+ emit(rv_srli(rd, rs, imm), ctx);
+}
+
+static inline void emit_srai(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen)
+ emitc(rvc_srai(rd, imm), ctx);
+ else
+ emit(rv_srai(rd, rs, imm), ctx);
+}
+
+static inline void emit_sub(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+ emitc(rvc_sub(rd, rs2), ctx);
+ else
+ emit(rv_sub(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_or(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+ emitc(rvc_or(rd, rs2), ctx);
+ else
+ emit(rv_or(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_and(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+ emitc(rvc_and(rd, rs2), ctx);
+ else
+ emit(rv_and(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_xor(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+ emitc(rvc_xor(rd, rs2), ctx);
+ else
+ emit(rv_xor(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_lw(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_8b_uint(off) && !(off & 0x3))
+ emitc(rvc_lwsp(rd, off), ctx);
+ else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_7b_uint(off) && !(off & 0x3))
+ emitc(rvc_lw(rd, off, rs1), ctx);
+ else
+ emit(rv_lw(rd, off, rs1), ctx);
+}
+
+static inline void emit_sw(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rs1 == RV_REG_SP && is_8b_uint(off) && !(off & 0x3))
+ emitc(rvc_swsp(off, rs2), ctx);
+ else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_7b_uint(off) && !(off & 0x3))
+ emitc(rvc_sw(rs1, off, rs2), ctx);
+ else
+ emit(rv_sw(rs1, off, rs2), ctx);
+}
+
+/* RV64-only helper functions. */
+#if __riscv_xlen == 64
+
+static inline void emit_addiw(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && rd == rs && is_6b_int(imm))
+ emitc(rvc_addiw(rd, imm), ctx);
+ else
+ emit(rv_addiw(rd, rs, imm), ctx);
+}
+
+static inline void emit_ld(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_9b_uint(off) && !(off & 0x7))
+ emitc(rvc_ldsp(rd, off), ctx);
+ else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_8b_uint(off) && !(off & 0x7))
+ emitc(rvc_ld(rd, off, rs1), ctx);
+ else
+ emit(rv_ld(rd, off, rs1), ctx);
+}
+
+static inline void emit_sd(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rs1 == RV_REG_SP && is_9b_uint(off) && !(off & 0x7))
+ emitc(rvc_sdsp(off, rs2), ctx);
+ else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_8b_uint(off) && !(off & 0x7))
+ emitc(rvc_sd(rs1, off, rs2), ctx);
+ else
+ emit(rv_sd(rs1, off, rs2), ctx);
+}
+
+static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+ emitc(rvc_subw(rd, rs2), ctx);
+ else
+ emit(rv_subw(rd, rs1, rs2), ctx);
+}
+
#endif /* __riscv_xlen == 64 */
void bpf_jit_build_prologue(struct rv_jit_context *ctx);
diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c
index b198eaa74456..529a83b85c1c 100644
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -644,7 +644,7 @@ static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 rvoff,
e = ctx->ninsns;
/* Adjust for extra insns. */
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
return 0;
}
@@ -713,7 +713,7 @@ static int emit_bcc(u8 op, u8 rd, u8 rs, int rvoff, struct rv_jit_context *ctx)
if (far) {
e = ctx->ninsns;
/* Adjust for extra insns. */
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
}
return 0;
@@ -731,7 +731,7 @@ static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 rvoff,
e = ctx->ninsns;
/* Adjust for extra insns. */
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
if (emit_bcc(op, lo(rs1), lo(rs2), rvoff, ctx))
return -1;
@@ -795,16 +795,15 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
* if (index >= max_entries)
* goto out;
*/
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx);
/*
- * temp_tcc = tcc - 1;
- * if (tcc < 0)
+ * if (--tcc < 0)
* goto out;
*/
- emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx);
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ emit(rv_addi(RV_REG_TCC, RV_REG_TCC, -1), ctx);
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
/*
@@ -818,7 +817,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
if (is_12b_check(off, insn))
return -1;
emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx);
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_bcc(BPF_JEQ, RV_REG_T0, RV_REG_ZERO, off, ctx);
/*
@@ -829,7 +828,6 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
if (is_12b_check(off, insn))
return -1;
emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx);
- emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
/* Epilogue jumps to *(t0 + 4). */
__build_epilogue(true, ctx);
return 0;
@@ -881,7 +879,7 @@ static int emit_store_r64(const s8 *dst, const s8 *src, s16 off,
const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
- if (mode == BPF_XADD && size != BPF_W)
+ if (mode == BPF_ATOMIC && size != BPF_W)
return -1;
emit_imm(RV_REG_T0, off, ctx);
@@ -899,7 +897,7 @@ static int emit_store_r64(const s8 *dst, const s8 *src, s16 off,
case BPF_MEM:
emit(rv_sw(RV_REG_T0, 0, lo(rs)), ctx);
break;
- case BPF_XADD:
+ case BPF_ATOMIC: /* Only BPF_ADD supported */
emit(rv_amoadd_w(RV_REG_ZERO, lo(rs), RV_REG_T0, 0, 0),
ctx);
break;
@@ -1020,7 +1018,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_zext64(dst, ctx);
break;
}
- /* Fallthrough. */
+ fallthrough;
case BPF_ALU | BPF_ADD | BPF_X:
case BPF_ALU | BPF_SUB | BPF_X:
@@ -1079,7 +1077,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case 16:
emit(rv_slli(lo(rd), lo(rd), 16), ctx);
emit(rv_srli(lo(rd), lo(rd), 16), ctx);
- /* Fallthrough. */
+ fallthrough;
case 32:
if (!ctx->prog->aux->verifier_zext)
emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx);
@@ -1214,7 +1212,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_imm32(tmp2, imm, ctx);
src = tmp2;
e = ctx->ninsns;
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
}
if (is64)
@@ -1251,6 +1249,10 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
return -1;
break;
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+
case BPF_ST | BPF_MEM | BPF_B:
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_W:
@@ -1260,7 +1262,6 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_STX | BPF_MEM | BPF_H:
case BPF_STX | BPF_MEM | BPF_W:
case BPF_STX | BPF_MEM | BPF_DW:
- case BPF_STX | BPF_XADD | BPF_W:
if (BPF_CLASS(code) == BPF_ST) {
emit_imm32(tmp2, imm, ctx);
src = tmp2;
@@ -1271,8 +1272,21 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
return -1;
break;
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ if (insn->imm != BPF_ADD) {
+ pr_info_once(
+ "bpf-jit: not supported: atomic operation %02x ***\n",
+ insn->imm);
+ return -EFAULT;
+ }
+
+ if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code),
+ BPF_MODE(code)))
+ return -1;
+ break;
+
/* No hardware support for 8-byte atomics in RV32. */
- case BPF_STX | BPF_XADD | BPF_DW:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
/* Fallthrough. */
notsupported:
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 6cfd164cbe88..00df3a8f92ac 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -5,6 +5,7 @@
*
*/
+#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include "bpf_jit.h"
@@ -27,6 +28,21 @@ static const int regmap[] = {
[BPF_REG_AX] = RV_REG_T0,
};
+static const int pt_regmap[] = {
+ [RV_REG_A0] = offsetof(struct pt_regs, a0),
+ [RV_REG_A1] = offsetof(struct pt_regs, a1),
+ [RV_REG_A2] = offsetof(struct pt_regs, a2),
+ [RV_REG_A3] = offsetof(struct pt_regs, a3),
+ [RV_REG_A4] = offsetof(struct pt_regs, a4),
+ [RV_REG_A5] = offsetof(struct pt_regs, a5),
+ [RV_REG_S1] = offsetof(struct pt_regs, s1),
+ [RV_REG_S2] = offsetof(struct pt_regs, s2),
+ [RV_REG_S3] = offsetof(struct pt_regs, s3),
+ [RV_REG_S4] = offsetof(struct pt_regs, s4),
+ [RV_REG_S5] = offsetof(struct pt_regs, s5),
+ [RV_REG_T0] = offsetof(struct pt_regs, t0),
+};
+
enum {
RV_CTX_F_SEEN_TAIL_CALL = 0,
RV_CTX_F_SEEN_CALL = RV_REG_RA,
@@ -132,19 +148,23 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
*
* This also means that we need to process LSB to MSB.
*/
- s64 upper = (val + (1 << 11)) >> 12, lower = val & 0xfff;
+ s64 upper = (val + (1 << 11)) >> 12;
+ /* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw,
+ * and addi are signed and RVC checks will perform signed comparisons.
+ */
+ s64 lower = ((val & 0xfff) << 52) >> 52;
int shift;
if (is_32b_int(val)) {
if (upper)
- emit(rv_lui(rd, upper), ctx);
+ emit_lui(rd, upper, ctx);
if (!upper) {
- emit(rv_addi(rd, RV_REG_ZERO, lower), ctx);
+ emit_li(rd, lower, ctx);
return;
}
- emit(rv_addiw(rd, rd, lower), ctx);
+ emit_addiw(rd, rd, lower, ctx);
return;
}
@@ -154,9 +174,9 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
emit_imm(rd, upper, ctx);
- emit(rv_slli(rd, rd, shift), ctx);
+ emit_slli(rd, rd, shift, ctx);
if (lower)
- emit(rv_addi(rd, rd, lower), ctx);
+ emit_addi(rd, rd, lower, ctx);
}
static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
@@ -164,43 +184,43 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
if (seen_reg(RV_REG_RA, ctx)) {
- emit(rv_ld(RV_REG_RA, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
- emit(rv_ld(RV_REG_FP, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
if (seen_reg(RV_REG_S1, ctx)) {
- emit(rv_ld(RV_REG_S1, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S2, ctx)) {
- emit(rv_ld(RV_REG_S2, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S3, ctx)) {
- emit(rv_ld(RV_REG_S3, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S4, ctx)) {
- emit(rv_ld(RV_REG_S4, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S5, ctx)) {
- emit(rv_ld(RV_REG_S5, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S6, ctx)) {
- emit(rv_ld(RV_REG_S6, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
- emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
+ emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
/* Set return value. */
if (!is_tail_call)
- emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
- emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
- is_tail_call ? 4 : 0), /* skip TCC init */
- ctx);
+ emit_mv(RV_REG_A0, RV_REG_A5, ctx);
+ emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
+ is_tail_call ? 4 : 0, /* skip TCC init */
+ ctx);
}
static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
@@ -280,8 +300,8 @@ static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
{
- emit(rv_slli(reg, reg, 32), ctx);
- emit(rv_srli(reg, reg, 32), ctx);
+ emit_slli(reg, reg, 32, ctx);
+ emit_srli(reg, reg, 32, ctx);
}
static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
@@ -304,35 +324,34 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
if (is_12b_check(off, insn))
return -1;
emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
- /* if (TCC-- < 0)
+ /* if (--TCC < 0)
* goto out;
*/
- emit(rv_addi(RV_REG_T1, tcc, -1), ctx);
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
- emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
+ emit_addi(RV_REG_TCC, tcc, -1, ctx);
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
+ emit_branch(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
/* prog = array->ptrs[index];
* if (!prog)
* goto out;
*/
- emit(rv_slli(RV_REG_T2, RV_REG_A2, 3), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_A1), ctx);
+ emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx);
off = offsetof(struct bpf_array, ptrs);
if (is_12b_check(off, insn))
return -1;
- emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx);
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ emit_ld(RV_REG_T2, off, RV_REG_T2, ctx);
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
/* goto *(prog->bpf_func + 4); */
off = offsetof(struct bpf_prog, bpf_func);
if (is_12b_check(off, insn))
return -1;
- emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx);
- emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
+ emit_ld(RV_REG_T3, off, RV_REG_T2, ctx);
__build_epilogue(true, ctx);
return 0;
}
@@ -360,9 +379,9 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
{
- emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
+ emit_mv(RV_REG_T2, *rd, ctx);
emit_zext_32(RV_REG_T2, ctx);
- emit(rv_addi(RV_REG_T1, *rs, 0), ctx);
+ emit_mv(RV_REG_T1, *rs, ctx);
emit_zext_32(RV_REG_T1, ctx);
*rd = RV_REG_T2;
*rs = RV_REG_T1;
@@ -370,15 +389,15 @@ static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
{
- emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
- emit(rv_addiw(RV_REG_T1, *rs, 0), ctx);
+ emit_addiw(RV_REG_T2, *rd, 0, ctx);
+ emit_addiw(RV_REG_T1, *rs, 0, ctx);
*rd = RV_REG_T2;
*rs = RV_REG_T1;
}
static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
{
- emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
+ emit_mv(RV_REG_T2, *rd, ctx);
emit_zext_32(RV_REG_T2, ctx);
emit_zext_32(RV_REG_T1, ctx);
*rd = RV_REG_T2;
@@ -386,7 +405,7 @@ static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
{
- emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
+ emit_addiw(RV_REG_T2, *rd, 0, ctx);
*rd = RV_REG_T2;
}
@@ -432,7 +451,155 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
if (ret)
return ret;
rd = bpf_to_rv_reg(BPF_REG_0, ctx);
- emit(rv_addi(rd, RV_REG_A0, 0), ctx);
+ emit_mv(rd, RV_REG_A0, ctx);
+ return 0;
+}
+
+static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
+ struct rv_jit_context *ctx)
+{
+ u8 r0;
+ int jmp_offset;
+
+ if (off) {
+ if (is_12b_int(off)) {
+ emit_addi(RV_REG_T1, rd, off, ctx);
+ } else {
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ }
+ rd = RV_REG_T1;
+ }
+
+ switch (imm) {
+ /* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */
+ case BPF_ADD:
+ emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) :
+ rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ break;
+ case BPF_AND:
+ emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) :
+ rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ break;
+ case BPF_OR:
+ emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) :
+ rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ break;
+ case BPF_XOR:
+ emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) :
+ rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ break;
+ /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */
+ case BPF_ADD | BPF_FETCH:
+ emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) :
+ rv_amoadd_w(rs, rs, rd, 0, 0), ctx);
+ if (!is64)
+ emit_zext_32(rs, ctx);
+ break;
+ case BPF_AND | BPF_FETCH:
+ emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) :
+ rv_amoand_w(rs, rs, rd, 0, 0), ctx);
+ if (!is64)
+ emit_zext_32(rs, ctx);
+ break;
+ case BPF_OR | BPF_FETCH:
+ emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) :
+ rv_amoor_w(rs, rs, rd, 0, 0), ctx);
+ if (!is64)
+ emit_zext_32(rs, ctx);
+ break;
+ case BPF_XOR | BPF_FETCH:
+ emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) :
+ rv_amoxor_w(rs, rs, rd, 0, 0), ctx);
+ if (!is64)
+ emit_zext_32(rs, ctx);
+ break;
+ /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */
+ case BPF_XCHG:
+ emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) :
+ rv_amoswap_w(rs, rs, rd, 0, 0), ctx);
+ if (!is64)
+ emit_zext_32(rs, ctx);
+ break;
+ /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */
+ case BPF_CMPXCHG:
+ r0 = bpf_to_rv_reg(BPF_REG_0, ctx);
+ emit(is64 ? rv_addi(RV_REG_T2, r0, 0) :
+ rv_addiw(RV_REG_T2, r0, 0), ctx);
+ emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) :
+ rv_lr_w(r0, 0, rd, 0, 0), ctx);
+ jmp_offset = ninsns_rvoff(8);
+ emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx);
+ emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) :
+ rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx);
+ jmp_offset = ninsns_rvoff(-6);
+ emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
+ emit(rv_fence(0x3, 0x3), ctx);
+ break;
+ }
+}
+
+#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
+#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
+
+bool ex_handler_bpf(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
+ int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
+
+ *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
+ regs->epc = (unsigned long)&ex->fixup - offset;
+
+ return true;
+}
+
+/* For accesses to BTF pointers, add an entry to the exception table */
+static int add_exception_handler(const struct bpf_insn *insn,
+ struct rv_jit_context *ctx,
+ int dst_reg, int insn_len)
+{
+ struct exception_table_entry *ex;
+ unsigned long pc;
+ off_t offset;
+
+ if (!ctx->insns || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM)
+ return 0;
+
+ if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(insn_len > ctx->ninsns))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(!rvc_enabled() && insn_len == 1))
+ return -EINVAL;
+
+ ex = &ctx->prog->aux->extable[ctx->nexentries];
+ pc = (unsigned long)&ctx->insns[ctx->ninsns - insn_len];
+
+ offset = pc - (long)&ex->insn;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex->insn = offset;
+
+ /*
+ * Since the extable follows the program, the fixup offset is always
+ * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
+ * to keep things simple, and put the destination register in the upper
+ * bits. We don't need to worry about buildtime or runtime sort
+ * modifying the upper bits because the table is already sorted, and
+ * isn't part of the main exception table.
+ */
+ offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
+ if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
+ return -ERANGE;
+
+ ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
+ FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
+ ex->type = EX_TYPE_BPF;
+
+ ctx->nexentries++;
return 0;
}
@@ -458,7 +625,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_zext_32(rd, ctx);
break;
}
- emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx);
+ emit_mv(rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -466,31 +633,35 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* dst = dst OP src */
case BPF_ALU | BPF_ADD | BPF_X:
case BPF_ALU64 | BPF_ADD | BPF_X:
- emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx);
+ emit_add(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_X:
case BPF_ALU64 | BPF_SUB | BPF_X:
- emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx);
+ if (is64)
+ emit_sub(rd, rd, rs, ctx);
+ else
+ emit_subw(rd, rd, rs, ctx);
+
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_AND | BPF_X:
- emit(rv_and(rd, rd, rs), ctx);
+ emit_and(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
- emit(rv_or(rd, rd, rs), ctx);
+ emit_or(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
- emit(rv_xor(rd, rd, rs), ctx);
+ emit_xor(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -534,8 +705,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* dst = -dst */
case BPF_ALU | BPF_NEG:
case BPF_ALU64 | BPF_NEG:
- emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) :
- rv_subw(rd, RV_REG_ZERO, rd), ctx);
+ emit_sub(rd, RV_REG_ZERO, rd, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -544,8 +714,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU | BPF_END | BPF_FROM_LE:
switch (imm) {
case 16:
- emit(rv_slli(rd, rd, 48), ctx);
- emit(rv_srli(rd, rd, 48), ctx);
+ emit_slli(rd, rd, 48, ctx);
+ emit_srli(rd, rd, 48, ctx);
break;
case 32:
if (!aux->verifier_zext)
@@ -558,51 +728,51 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
break;
case BPF_ALU | BPF_END | BPF_FROM_BE:
- emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx);
+ emit_li(RV_REG_T2, 0, ctx);
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
if (imm == 16)
goto out_be;
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
if (imm == 32)
goto out_be;
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
-
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
-
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
-
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
out_be:
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit(rv_addi(rd, RV_REG_T2, 0), ctx);
+ emit_mv(rd, RV_REG_T2, ctx);
break;
/* dst = imm */
@@ -617,12 +787,10 @@ out_be:
case BPF_ALU | BPF_ADD | BPF_K:
case BPF_ALU64 | BPF_ADD | BPF_K:
if (is_12b_int(imm)) {
- emit(is64 ? rv_addi(rd, rd, imm) :
- rv_addiw(rd, rd, imm), ctx);
+ emit_addi(rd, rd, imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(is64 ? rv_add(rd, rd, RV_REG_T1) :
- rv_addw(rd, rd, RV_REG_T1), ctx);
+ emit_add(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@@ -630,12 +798,10 @@ out_be:
case BPF_ALU | BPF_SUB | BPF_K:
case BPF_ALU64 | BPF_SUB | BPF_K:
if (is_12b_int(-imm)) {
- emit(is64 ? rv_addi(rd, rd, -imm) :
- rv_addiw(rd, rd, -imm), ctx);
+ emit_addi(rd, rd, -imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(is64 ? rv_sub(rd, rd, RV_REG_T1) :
- rv_subw(rd, rd, RV_REG_T1), ctx);
+ emit_sub(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@@ -643,10 +809,10 @@ out_be:
case BPF_ALU | BPF_AND | BPF_K:
case BPF_ALU64 | BPF_AND | BPF_K:
if (is_12b_int(imm)) {
- emit(rv_andi(rd, rd, imm), ctx);
+ emit_andi(rd, rd, imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(rv_and(rd, rd, RV_REG_T1), ctx);
+ emit_and(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@@ -657,7 +823,7 @@ out_be:
emit(rv_ori(rd, rd, imm), ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(rv_or(rd, rd, RV_REG_T1), ctx);
+ emit_or(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@@ -668,7 +834,7 @@ out_be:
emit(rv_xori(rd, rd, imm), ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(rv_xor(rd, rd, RV_REG_T1), ctx);
+ emit_xor(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@@ -699,19 +865,28 @@ out_be:
break;
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU64 | BPF_LSH | BPF_K:
- emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx);
+ emit_slli(rd, rd, imm, ctx);
+
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_RSH | BPF_K:
case BPF_ALU64 | BPF_RSH | BPF_K:
- emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx);
+ if (is64)
+ emit_srli(rd, rd, imm, ctx);
+ else
+ emit(rv_srliw(rd, rd, imm), ctx);
+
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_ARSH | BPF_K:
case BPF_ALU64 | BPF_ARSH | BPF_K:
- emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx);
+ if (is64)
+ emit_srai(rd, rd, imm, ctx);
+ else
+ emit(rv_sraiw(rd, rd, imm), ctx);
+
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -757,13 +932,13 @@ out_be:
e = ctx->ninsns;
/* Adjust for extra insns */
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
}
if (BPF_OP(code) == BPF_JSET) {
/* Adjust for and */
rvoff -= 4;
- emit(rv_and(RV_REG_T1, rd, rs), ctx);
+ emit_and(RV_REG_T1, rd, rs, ctx);
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
ctx);
} else {
@@ -810,7 +985,7 @@ out_be:
e = ctx->ninsns;
/* Adjust for extra insns */
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
break;
@@ -819,19 +994,19 @@ out_be:
rvoff = rv_offset(i, off, ctx);
s = ctx->ninsns;
if (is_12b_int(imm)) {
- emit(rv_andi(RV_REG_T1, rd, imm), ctx);
+ emit_andi(RV_REG_T1, rd, imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
+ emit_and(RV_REG_T1, rd, RV_REG_T1, ctx);
}
/* For jset32, we should clear the upper 32 bits of t1, but
* sign-extension is sufficient here and saves one instruction,
* as t1 is used only in comparison against zero.
*/
if (!is64 && imm < 0)
- emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx);
+ emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx);
e = ctx->ninsns;
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
break;
@@ -881,50 +1056,88 @@ out_be:
/* LDX: dst = *(size *)(src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
- if (is_12b_int(off)) {
- emit(rv_lbu(rd, off, rs), ctx);
+ case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ {
+ int insn_len, insns_start;
+
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ emit(rv_lbu(rd, off, rs), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ break;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
- }
+ case BPF_H:
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ emit(rv_lhu(rd, off, rs), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ break;
+ }
- emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
- emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
- if (insn_is_zext(&insn[1]))
- return 1;
- break;
- case BPF_LDX | BPF_MEM | BPF_H:
- if (is_12b_int(off)) {
- emit(rv_lhu(rd, off, rs), ctx);
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
- }
+ case BPF_W:
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ emit(rv_lwu(rd, off, rs), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ break;
+ }
- emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
- emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
- if (insn_is_zext(&insn[1]))
- return 1;
- break;
- case BPF_LDX | BPF_MEM | BPF_W:
- if (is_12b_int(off)) {
- emit(rv_lwu(rd, off, rs), ctx);
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
- }
+ case BPF_DW:
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ emit_ld(rd, off, rs, ctx);
+ insn_len = ctx->ninsns - insns_start;
+ break;
+ }
- emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
- emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
- if (insn_is_zext(&insn[1]))
- return 1;
- break;
- case BPF_LDX | BPF_MEM | BPF_DW:
- if (is_12b_int(off)) {
- emit(rv_ld(rd, off, rs), ctx);
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ emit_ld(rd, 0, RV_REG_T1, ctx);
+ insn_len = ctx->ninsns - insns_start;
break;
}
- emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
- emit(rv_ld(rd, 0, RV_REG_T1), ctx);
+ ret = add_exception_handler(insn, ctx, rd, insn_len);
+ if (ret)
+ return ret;
+ break;
+ }
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
break;
/* ST: *(size *)(dst + off) = imm */
@@ -936,7 +1149,7 @@ out_be:
}
emit_imm(RV_REG_T2, off, ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
break;
@@ -948,30 +1161,30 @@ out_be:
}
emit_imm(RV_REG_T2, off, ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
break;
case BPF_ST | BPF_MEM | BPF_W:
emit_imm(RV_REG_T1, imm, ctx);
if (is_12b_int(off)) {
- emit(rv_sw(rd, off, RV_REG_T1), ctx);
+ emit_sw(rd, off, RV_REG_T1, ctx);
break;
}
emit_imm(RV_REG_T2, off, ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
- emit(rv_sw(RV_REG_T2, 0, RV_REG_T1), ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+ emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
break;
case BPF_ST | BPF_MEM | BPF_DW:
emit_imm(RV_REG_T1, imm, ctx);
if (is_12b_int(off)) {
- emit(rv_sd(rd, off, RV_REG_T1), ctx);
+ emit_sd(rd, off, RV_REG_T1, ctx);
break;
}
emit_imm(RV_REG_T2, off, ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
- emit(rv_sd(RV_REG_T2, 0, RV_REG_T1), ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+ emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
break;
/* STX: *(size *)(dst + off) = src */
@@ -982,7 +1195,7 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
emit(rv_sb(RV_REG_T1, 0, rs), ctx);
break;
case BPF_STX | BPF_MEM | BPF_H:
@@ -992,47 +1205,33 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
emit(rv_sh(RV_REG_T1, 0, rs), ctx);
break;
case BPF_STX | BPF_MEM | BPF_W:
if (is_12b_int(off)) {
- emit(rv_sw(rd, off, rs), ctx);
+ emit_sw(rd, off, rs, ctx);
break;
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
- emit(rv_sw(RV_REG_T1, 0, rs), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit_sw(RV_REG_T1, 0, rs, ctx);
break;
case BPF_STX | BPF_MEM | BPF_DW:
if (is_12b_int(off)) {
- emit(rv_sd(rd, off, rs), ctx);
+ emit_sd(rd, off, rs, ctx);
break;
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
- emit(rv_sd(RV_REG_T1, 0, rs), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit_sd(RV_REG_T1, 0, rs, ctx);
break;
- /* STX XADD: lock *(u32 *)(dst + off) += src */
- case BPF_STX | BPF_XADD | BPF_W:
- /* STX XADD: lock *(u64 *)(dst + off) += src */
- case BPF_STX | BPF_XADD | BPF_DW:
- if (off) {
- if (is_12b_int(off)) {
- emit(rv_addi(RV_REG_T1, rd, off), ctx);
- } else {
- emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
- }
-
- rd = RV_REG_T1;
- }
-
- emit(BPF_SIZE(code) == BPF_W ?
- rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0) :
- rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
+ emit_atomic(rd, rs, off, imm,
+ BPF_SIZE(code) == BPF_DW, ctx);
break;
default:
pr_err("bpf-jit: unknown opcode %02x\n", code);
@@ -1073,52 +1272,53 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx)
/* First instruction is always setting the tail-call-counter
* (TCC) register. This instruction is skipped for tail calls.
+ * Force using a 4-byte (non-compressed) instruction.
*/
emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx);
- emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx);
+ emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx);
if (seen_reg(RV_REG_RA, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_RA), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx);
store_offset -= 8;
}
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_FP), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx);
store_offset -= 8;
if (seen_reg(RV_REG_S1, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S1), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S2, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S2), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S3, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S3), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S4, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S4), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S5, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S5), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S6, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S6), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx);
store_offset -= 8;
}
- emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx);
+ emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx);
if (bpf_stack_adjust)
- emit(rv_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust), ctx);
+ emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx);
/* Program contains calls and tail calls, so RV_REG_TCC need
* to be saved across calls.
*/
if (seen_tail_call(ctx) && seen_call(ctx))
- emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx);
+ emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx);
ctx->stack_size = stack_adjust;
}
@@ -1127,16 +1327,3 @@ void bpf_jit_build_epilogue(struct rv_jit_context *ctx)
{
__build_epilogue(false, ctx);
}
-
-void *bpf_jit_alloc_exec(unsigned long size)
-{
- return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
- BPF_JIT_REGION_END, GFP_KERNEL,
- PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
- return vfree(addr);
-}
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 709b94ece3ed..be743d700aa7 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -11,7 +11,7 @@
#include "bpf_jit.h"
/* Number of iterations to try until offsets converge. */
-#define NR_JIT_ITERATIONS 16
+#define NR_JIT_ITERATIONS 32
static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset)
{
@@ -41,12 +41,12 @@ bool bpf_jit_needs_zext(void)
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
+ unsigned int prog_size = 0, extable_size = 0;
bool tmp_blinded = false, extra_pass = false;
struct bpf_prog *tmp, *orig_prog = prog;
int pass = 0, prev_ninsns = 0, i;
struct rv_jit_data *jit_data;
struct rv_jit_context *ctx;
- unsigned int image_size = 0;
if (!prog->jit_requested)
return orig_prog;
@@ -73,7 +73,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (ctx->offset) {
extra_pass = true;
- image_size = sizeof(u32) * ctx->ninsns;
+ prog_size = sizeof(*ctx->insns) * ctx->ninsns;
goto skip_init_ctx;
}
@@ -102,10 +102,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (ctx->ninsns == prev_ninsns) {
if (jit_data->header)
break;
+ /* obtain the actual image size */
+ extable_size = prog->aux->num_exentries *
+ sizeof(struct exception_table_entry);
+ prog_size = sizeof(*ctx->insns) * ctx->ninsns;
- image_size = sizeof(u32) * ctx->ninsns;
jit_data->header =
- bpf_jit_binary_alloc(image_size,
+ bpf_jit_binary_alloc(prog_size + extable_size,
&jit_data->image,
sizeof(u32),
bpf_fill_ill_insns);
@@ -114,7 +117,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_offset;
}
- ctx->insns = (u32 *)jit_data->image;
+ ctx->insns = (u16 *)jit_data->image;
/*
* Now, when the image is allocated, the image can
* potentially shrink more (auipc/jalr -> jal).
@@ -125,14 +128,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (i == NR_JIT_ITERATIONS) {
pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
- bpf_jit_binary_free(jit_data->header);
+ if (jit_data->header)
+ bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
goto out_offset;
}
+ if (extable_size)
+ prog->aux->extable = (void *)ctx->insns + prog_size;
+
skip_init_ctx:
pass++;
ctx->ninsns = 0;
+ ctx->nexentries = 0;
bpf_jit_build_prologue(ctx);
if (build_body(ctx, extra_pass, NULL)) {
@@ -143,15 +151,16 @@ skip_init_ctx:
bpf_jit_build_epilogue(ctx);
if (bpf_jit_enable > 1)
- bpf_jit_dump(prog->len, image_size, pass, ctx->insns);
+ bpf_jit_dump(prog->len, prog_size, pass, ctx->insns);
prog->bpf_func = (void *)ctx->insns;
prog->jited = 1;
- prog->jited_len = image_size;
+ prog->jited_len = prog_size;
bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);
if (!prog->is_func || extra_pass) {
+ bpf_jit_binary_lock_ro(jit_data->header);
out_offset:
kfree(ctx->offset);
kfree(jit_data);
@@ -164,3 +173,21 @@ out:
tmp : orig_prog);
return prog;
}
+
+u64 bpf_jit_alloc_exec_limit(void)
+{
+ return BPF_JIT_REGION_SIZE;
+}
+
+void *bpf_jit_alloc_exec(unsigned long size)
+{
+ return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
+ BPF_JIT_REGION_END, GFP_KERNEL,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+
+void bpf_jit_free_exec(void *addr)
+{
+ return vfree(addr);
+}
diff --git a/arch/riscv/purgatory/.gitignore b/arch/riscv/purgatory/.gitignore
new file mode 100644
index 000000000000..38d7d1bda4d7
--- /dev/null
+++ b/arch/riscv/purgatory/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+purgatory.chk
+purgatory.ro
+kexec-purgatory.c
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
new file mode 100644
index 000000000000..d4df200f7edf
--- /dev/null
+++ b/arch/riscv/purgatory/Makefile
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0
+OBJECT_FILES_NON_STANDARD := y
+
+purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
+
+targets += $(purgatory-y)
+PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+
+$(obj)/string.o: $(srctree)/lib/string.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+$(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+$(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE
+ $(call if_changed_rule,as_o_S)
+
+$(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE
+ $(call if_changed_rule,as_o_S)
+
+$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+CFLAGS_string.o := -D__DISABLE_EXPORTS
+CFLAGS_ctype.o := -D__DISABLE_EXPORTS
+
+# When linking purgatory.ro with -r unresolved symbols are not checked,
+# also link a purgatory.chk binary without -r to check for unresolved symbols.
+PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib
+LDFLAGS_purgatory.ro := -r $(PURGATORY_LDFLAGS)
+LDFLAGS_purgatory.chk := $(PURGATORY_LDFLAGS)
+targets += purgatory.ro purgatory.chk
+
+# Sanitizer, etc. runtimes are unavailable and cannot be linked here.
+GCOV_PROFILE := n
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+# These are adjustments to the compiler flags used for objects that
+# make up the standalone purgatory.ro
+
+PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
+PURGATORY_CFLAGS := -mcmodel=medany -ffreestanding -fno-zero-initialized-in-bss
+PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING
+PURGATORY_CFLAGS += -fno-stack-protector -g0
+
+# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
+# in turn leaves some undefined symbols like __fentry__ in purgatory and not
+# sure how to relocate those.
+ifdef CONFIG_FUNCTION_TRACER
+PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_FTRACE)
+endif
+
+ifdef CONFIG_STACKPROTECTOR
+PURGATORY_CFLAGS_REMOVE += -fstack-protector
+endif
+
+ifdef CONFIG_STACKPROTECTOR_STRONG
+PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong
+endif
+
+CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_purgatory.o += $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_sha256.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_string.o += $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_ctype.o += $(PURGATORY_CFLAGS)
+
+AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2
+AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2
+AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2
+
+$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ $(call if_changed,ld)
+
+$(obj)/purgatory.chk: $(obj)/purgatory.ro FORCE
+ $(call if_changed,ld)
+
+targets += kexec-purgatory.c
+
+quiet_cmd_bin2c = BIN2C $@
+ cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@
+
+$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro $(obj)/purgatory.chk FORCE
+ $(call if_changed,bin2c)
+
+obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o
diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S
new file mode 100644
index 000000000000..0194f4554130
--- /dev/null
+++ b/arch/riscv/purgatory/entry.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * purgatory: Runs between two kernels
+ *
+ * Copyright (C) 2022 Huawei Technologies Co, Ltd.
+ *
+ * Author: Li Zhengyu (lizhengyu3@huawei.com)
+ *
+ */
+
+.macro size, sym:req
+ .size \sym, . - \sym
+.endm
+
+.text
+
+.globl purgatory_start
+purgatory_start:
+
+ lla sp, .Lstack
+ mv s0, a0 /* The hartid of the current hart */
+ mv s1, a1 /* Phys address of the FDT image */
+
+ jal purgatory
+
+ /* Start new image. */
+ mv a0, s0
+ mv a1, s1
+ ld a2, riscv_kernel_entry
+ jr a2
+
+size purgatory_start
+
+.align 4
+ .rept 256
+ .quad 0
+ .endr
+.Lstack:
+
+.data
+
+.globl riscv_kernel_entry
+riscv_kernel_entry:
+ .quad 0
+size riscv_kernel_entry
+
+.end
diff --git a/arch/riscv/purgatory/purgatory.c b/arch/riscv/purgatory/purgatory.c
new file mode 100644
index 000000000000..80596ab5fb62
--- /dev/null
+++ b/arch/riscv/purgatory/purgatory.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * purgatory: Runs between two kernels
+ *
+ * Copyright (C) 2022 Huawei Technologies Co, Ltd.
+ *
+ * Author: Li Zhengyu (lizhengyu3@huawei.com)
+ *
+ */
+
+#include <linux/purgatory.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <asm/string.h>
+
+u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(".kexec-purgatory");
+
+struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(".kexec-purgatory");
+
+static int verify_sha256_digest(void)
+{
+ struct kexec_sha_region *ptr, *end;
+ struct sha256_state ss;
+ u8 digest[SHA256_DIGEST_SIZE];
+
+ sha256_init(&ss);
+ end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
+ for (ptr = purgatory_sha_regions; ptr < end; ptr++)
+ sha256_update(&ss, (uint8_t *)(ptr->start), ptr->len);
+ sha256_final(&ss, digest);
+ if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)) != 0)
+ return 1;
+ return 0;
+}
+
+/* workaround for a warning with -Wmissing-prototypes */
+void purgatory(void);
+
+void purgatory(void)
+{
+ if (verify_sha256_digest())
+ for (;;)
+ /* loop forever */
+ ;
+}