aboutsummaryrefslogtreecommitdiff
path: root/kernel/bpf/verifier.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r--kernel/bpf/verifier.c1132
1 files changed, 861 insertions, 271 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 17270b8404f1..1dda9d81f12c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -228,6 +228,12 @@ static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
(poisoned ? BPF_MAP_KEY_POISON : 0ULL);
}
+static bool bpf_pseudo_call(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->src_reg == BPF_PSEUDO_CALL;
+}
+
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
@@ -1073,6 +1079,51 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
__mark_reg_known_zero(regs + regno);
}
+static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
+{
+ switch (reg->type) {
+ case PTR_TO_MAP_VALUE_OR_NULL: {
+ const struct bpf_map *map = reg->map_ptr;
+
+ if (map->inner_map_meta) {
+ reg->type = CONST_PTR_TO_MAP;
+ reg->map_ptr = map->inner_map_meta;
+ } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
+ reg->type = PTR_TO_XDP_SOCK;
+ } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
+ map->map_type == BPF_MAP_TYPE_SOCKHASH) {
+ reg->type = PTR_TO_SOCKET;
+ } else {
+ reg->type = PTR_TO_MAP_VALUE;
+ }
+ break;
+ }
+ case PTR_TO_SOCKET_OR_NULL:
+ reg->type = PTR_TO_SOCKET;
+ break;
+ case PTR_TO_SOCK_COMMON_OR_NULL:
+ reg->type = PTR_TO_SOCK_COMMON;
+ break;
+ case PTR_TO_TCP_SOCK_OR_NULL:
+ reg->type = PTR_TO_TCP_SOCK;
+ break;
+ case PTR_TO_BTF_ID_OR_NULL:
+ reg->type = PTR_TO_BTF_ID;
+ break;
+ case PTR_TO_MEM_OR_NULL:
+ reg->type = PTR_TO_MEM;
+ break;
+ case PTR_TO_RDONLY_BUF_OR_NULL:
+ reg->type = PTR_TO_RDONLY_BUF;
+ break;
+ case PTR_TO_RDWR_BUF_OR_NULL:
+ reg->type = PTR_TO_RDWR_BUF;
+ break;
+ default:
+ WARN_ON("unknown nullable register type");
+ }
+}
+
static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
{
return type_is_pkt_pointer(reg->type);
@@ -1486,9 +1537,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
/* determine subprog starts. The end is one before the next starts */
for (i = 0; i < insn_cnt; i++) {
- if (insn[i].code != (BPF_JMP | BPF_CALL))
- continue;
- if (insn[i].src_reg != BPF_PSEUDO_CALL)
+ if (!bpf_pseudo_call(insn + i))
continue;
if (!env->bpf_capable) {
verbose(env,
@@ -2217,6 +2266,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_RDWR_BUF:
case PTR_TO_RDWR_BUF_OR_NULL:
case PTR_TO_PERCPU_BTF_ID:
+ case PTR_TO_MEM:
+ case PTR_TO_MEM_OR_NULL:
return true;
default:
return false;
@@ -2269,12 +2320,14 @@ static void save_register_state(struct bpf_func_state *state,
state->stack[spi].slot_type[i] = STACK_SPILL;
}
-/* check_stack_read/write functions track spill/fill of registers,
+/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
-static int check_stack_write(struct bpf_verifier_env *env,
- struct bpf_func_state *state, /* func where register points to */
- int off, int size, int value_regno, int insn_idx)
+static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
+ /* stack frame we're writing to */
+ struct bpf_func_state *state,
+ int off, int size, int value_regno,
+ int insn_idx)
{
struct bpf_func_state *cur; /* state of the current function */
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
@@ -2400,9 +2453,175 @@ static int check_stack_write(struct bpf_verifier_env *env,
return 0;
}
-static int check_stack_read(struct bpf_verifier_env *env,
- struct bpf_func_state *reg_state /* func where register points to */,
- int off, int size, int value_regno)
+/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
+ * known to contain a variable offset.
+ * This function checks whether the write is permitted and conservatively
+ * tracks the effects of the write, considering that each stack slot in the
+ * dynamic range is potentially written to.
+ *
+ * 'off' includes 'regno->off'.
+ * 'value_regno' can be -1, meaning that an unknown value is being written to
+ * the stack.
+ *
+ * Spilled pointers in range are not marked as written because we don't know
+ * what's going to be actually written. This means that read propagation for
+ * future reads cannot be terminated by this write.
+ *
+ * For privileged programs, uninitialized stack slots are considered
+ * initialized by this write (even though we don't know exactly what offsets
+ * are going to be written to). The idea is that we don't want the verifier to
+ * reject future reads that access slots written to through variable offsets.
+ */
+static int check_stack_write_var_off(struct bpf_verifier_env *env,
+ /* func where register points to */
+ struct bpf_func_state *state,
+ int ptr_regno, int off, int size,
+ int value_regno, int insn_idx)
+{
+ struct bpf_func_state *cur; /* state of the current function */
+ int min_off, max_off;
+ int i, err;
+ struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
+ bool writing_zero = false;
+ /* set if the fact that we're writing a zero is used to let any
+ * stack slots remain STACK_ZERO
+ */
+ bool zero_used = false;
+
+ cur = env->cur_state->frame[env->cur_state->curframe];
+ ptr_reg = &cur->regs[ptr_regno];
+ min_off = ptr_reg->smin_value + off;
+ max_off = ptr_reg->smax_value + off + size;
+ if (value_regno >= 0)
+ value_reg = &cur->regs[value_regno];
+ if (value_reg && register_is_null(value_reg))
+ writing_zero = true;
+
+ err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE),
+ state->acquired_refs, true);
+ if (err)
+ return err;
+
+
+ /* Variable offset writes destroy any spilled pointers in range. */
+ for (i = min_off; i < max_off; i++) {
+ u8 new_type, *stype;
+ int slot, spi;
+
+ slot = -i - 1;
+ spi = slot / BPF_REG_SIZE;
+ stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
+
+ if (!env->allow_ptr_leaks
+ && *stype != NOT_INIT
+ && *stype != SCALAR_VALUE) {
+ /* Reject the write if there's are spilled pointers in
+ * range. If we didn't reject here, the ptr status
+ * would be erased below (even though not all slots are
+ * actually overwritten), possibly opening the door to
+ * leaks.
+ */
+ verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
+ insn_idx, i);
+ return -EINVAL;
+ }
+
+ /* Erase all spilled pointers. */
+ state->stack[spi].spilled_ptr.type = NOT_INIT;
+
+ /* Update the slot type. */
+ new_type = STACK_MISC;
+ if (writing_zero && *stype == STACK_ZERO) {
+ new_type = STACK_ZERO;
+ zero_used = true;
+ }
+ /* If the slot is STACK_INVALID, we check whether it's OK to
+ * pretend that it will be initialized by this write. The slot
+ * might not actually be written to, and so if we mark it as
+ * initialized future reads might leak uninitialized memory.
+ * For privileged programs, we will accept such reads to slots
+ * that may or may not be written because, if we're reject
+ * them, the error would be too confusing.
+ */
+ if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
+ verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
+ insn_idx, i);
+ return -EINVAL;
+ }
+ *stype = new_type;
+ }
+ if (zero_used) {
+ /* backtracking doesn't work for STACK_ZERO yet. */
+ err = mark_chain_precision(env, value_regno);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/* When register 'dst_regno' is assigned some values from stack[min_off,
+ * max_off), we set the register's type according to the types of the
+ * respective stack slots. If all the stack values are known to be zeros, then
+ * so is the destination reg. Otherwise, the register is considered to be
+ * SCALAR. This function does not deal with register filling; the caller must
+ * ensure that all spilled registers in the stack range have been marked as
+ * read.
+ */
+static void mark_reg_stack_read(struct bpf_verifier_env *env,
+ /* func where src register points to */
+ struct bpf_func_state *ptr_state,
+ int min_off, int max_off, int dst_regno)
+{
+ struct bpf_verifier_state *vstate = env->cur_state;
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ int i, slot, spi;
+ u8 *stype;
+ int zeros = 0;
+
+ for (i = min_off; i < max_off; i++) {
+ slot = -i - 1;
+ spi = slot / BPF_REG_SIZE;
+ stype = ptr_state->stack[spi].slot_type;
+ if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
+ break;
+ zeros++;
+ }
+ if (zeros == max_off - min_off) {
+ /* any access_size read into register is zero extended,
+ * so the whole register == const_zero
+ */
+ __mark_reg_const_zero(&state->regs[dst_regno]);
+ /* backtracking doesn't support STACK_ZERO yet,
+ * so mark it precise here, so that later
+ * backtracking can stop here.
+ * Backtracking may not need this if this register
+ * doesn't participate in pointer adjustment.
+ * Forward propagation of precise flag is not
+ * necessary either. This mark is only to stop
+ * backtracking. Any register that contributed
+ * to const 0 was marked precise before spill.
+ */
+ state->regs[dst_regno].precise = true;
+ } else {
+ /* have read misc data from the stack */
+ mark_reg_unknown(env, state->regs, dst_regno);
+ }
+ state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
+}
+
+/* Read the stack at 'off' and put the results into the register indicated by
+ * 'dst_regno'. It handles reg filling if the addressed stack slot is a
+ * spilled reg.
+ *
+ * 'dst_regno' can be -1, meaning that the read value is not going to a
+ * register.
+ *
+ * The access is assumed to be within the current stack bounds.
+ */
+static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
+ /* func where src register points to */
+ struct bpf_func_state *reg_state,
+ int off, int size, int dst_regno)
{
struct bpf_verifier_state *vstate = env->cur_state;
struct bpf_func_state *state = vstate->frame[vstate->curframe];
@@ -2410,11 +2629,6 @@ static int check_stack_read(struct bpf_verifier_env *env,
struct bpf_reg_state *reg;
u8 *stype;
- if (reg_state->allocated_stack <= slot) {
- verbose(env, "invalid read from stack off %d+0 size %d\n",
- off, size);
- return -EACCES;
- }
stype = reg_state->stack[spi].slot_type;
reg = &reg_state->stack[spi].spilled_ptr;
@@ -2425,9 +2639,9 @@ static int check_stack_read(struct bpf_verifier_env *env,
verbose(env, "invalid size of register fill\n");
return -EACCES;
}
- if (value_regno >= 0) {
- mark_reg_unknown(env, state->regs, value_regno);
- state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+ if (dst_regno >= 0) {
+ mark_reg_unknown(env, state->regs, dst_regno);
+ state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
}
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
return 0;
@@ -2439,16 +2653,16 @@ static int check_stack_read(struct bpf_verifier_env *env,
}
}
- if (value_regno >= 0) {
+ if (dst_regno >= 0) {
/* restore register state from stack */
- state->regs[value_regno] = *reg;
+ state->regs[dst_regno] = *reg;
/* mark reg as written since spilled pointer state likely
* has its liveness marks cleared by is_state_visited()
* which resets stack/reg liveness for state transitions
*/
- state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+ state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
- /* If value_regno==-1, the caller is asking us whether
+ /* If dst_regno==-1, the caller is asking us whether
* it is acceptable to use this value as a SCALAR_VALUE
* (e.g. for XADD).
* We must not allow unprivileged callers to do that
@@ -2460,70 +2674,167 @@ static int check_stack_read(struct bpf_verifier_env *env,
}
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
} else {
- int zeros = 0;
+ u8 type;
for (i = 0; i < size; i++) {
- if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
+ type = stype[(slot - i) % BPF_REG_SIZE];
+ if (type == STACK_MISC)
continue;
- if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
- zeros++;
+ if (type == STACK_ZERO)
continue;
- }
verbose(env, "invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
}
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
- if (value_regno >= 0) {
- if (zeros == size) {
- /* any size read into register is zero extended,
- * so the whole register == const_zero
- */
- __mark_reg_const_zero(&state->regs[value_regno]);
- /* backtracking doesn't support STACK_ZERO yet,
- * so mark it precise here, so that later
- * backtracking can stop here.
- * Backtracking may not need this if this register
- * doesn't participate in pointer adjustment.
- * Forward propagation of precise flag is not
- * necessary either. This mark is only to stop
- * backtracking. Any register that contributed
- * to const 0 was marked precise before spill.
- */
- state->regs[value_regno].precise = true;
- } else {
- /* have read misc data from the stack */
- mark_reg_unknown(env, state->regs, value_regno);
- }
- state->regs[value_regno].live |= REG_LIVE_WRITTEN;
- }
+ if (dst_regno >= 0)
+ mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
}
return 0;
}
-static int check_stack_access(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg,
- int off, int size)
+enum stack_access_src {
+ ACCESS_DIRECT = 1, /* the access is performed by an instruction */
+ ACCESS_HELPER = 2, /* the access is performed by a helper */
+};
+
+static int check_stack_range_initialized(struct bpf_verifier_env *env,
+ int regno, int off, int access_size,
+ bool zero_size_allowed,
+ enum stack_access_src type,
+ struct bpf_call_arg_meta *meta);
+
+static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
{
- /* Stack accesses must be at a fixed offset, so that we
- * can determine what type of data were returned. See
- * check_stack_read().
+ return cur_regs(env) + regno;
+}
+
+/* Read the stack at 'ptr_regno + off' and put the result into the register
+ * 'dst_regno'.
+ * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
+ * but not its variable offset.
+ * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
+ *
+ * As opposed to check_stack_read_fixed_off, this function doesn't deal with
+ * filling registers (i.e. reads of spilled register cannot be detected when
+ * the offset is not fixed). We conservatively mark 'dst_regno' as containing
+ * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
+ * offset; for a fixed offset check_stack_read_fixed_off should be used
+ * instead.
+ */
+static int check_stack_read_var_off(struct bpf_verifier_env *env,
+ int ptr_regno, int off, int size, int dst_regno)
+{
+ /* The state of the source register. */
+ struct bpf_reg_state *reg = reg_state(env, ptr_regno);
+ struct bpf_func_state *ptr_state = func(env, reg);
+ int err;
+ int min_off, max_off;
+
+ /* Note that we pass a NULL meta, so raw access will not be permitted.
*/
- if (!tnum_is_const(reg->var_off)) {
+ err = check_stack_range_initialized(env, ptr_regno, off, size,
+ false, ACCESS_DIRECT, NULL);
+ if (err)
+ return err;
+
+ min_off = reg->smin_value + off;
+ max_off = reg->smax_value + off;
+ mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
+ return 0;
+}
+
+/* check_stack_read dispatches to check_stack_read_fixed_off or
+ * check_stack_read_var_off.
+ *
+ * The caller must ensure that the offset falls within the allocated stack
+ * bounds.
+ *
+ * 'dst_regno' is a register which will receive the value from the stack. It
+ * can be -1, meaning that the read value is not going to a register.
+ */
+static int check_stack_read(struct bpf_verifier_env *env,
+ int ptr_regno, int off, int size,
+ int dst_regno)
+{
+ struct bpf_reg_state *reg = reg_state(env, ptr_regno);
+ struct bpf_func_state *state = func(env, reg);
+ int err;
+ /* Some accesses are only permitted with a static offset. */
+ bool var_off = !tnum_is_const(reg->var_off);
+
+ /* The offset is required to be static when reads don't go to a
+ * register, in order to not leak pointers (see
+ * check_stack_read_fixed_off).
+ */
+ if (dst_regno < 0 && var_off) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
+ verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
tn_buf, off, size);
return -EACCES;
}
+ /* Variable offset is prohibited for unprivileged mode for simplicity
+ * since it requires corresponding support in Spectre masking for stack
+ * ALU. See also retrieve_ptr_limit().
+ */
+ if (!env->bypass_spec_v1 && var_off) {
+ char tn_buf[48];
- if (off >= 0 || off < -MAX_BPF_STACK) {
- verbose(env, "invalid stack off=%d size=%d\n", off, size);
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
+ ptr_regno, tn_buf);
return -EACCES;
}
- return 0;
+ if (!var_off) {
+ off += reg->var_off.value;
+ err = check_stack_read_fixed_off(env, state, off, size,
+ dst_regno);
+ } else {
+ /* Variable offset stack reads need more conservative handling
+ * than fixed offset ones. Note that dst_regno >= 0 on this
+ * branch.
+ */
+ err = check_stack_read_var_off(env, ptr_regno, off, size,
+ dst_regno);
+ }
+ return err;
+}
+
+
+/* check_stack_write dispatches to check_stack_write_fixed_off or
+ * check_stack_write_var_off.
+ *
+ * 'ptr_regno' is the register used as a pointer into the stack.
+ * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
+ * 'value_regno' is the register whose value we're writing to the stack. It can
+ * be -1, meaning that we're not writing from a register.
+ *
+ * The caller must ensure that the offset falls within the maximum stack size.
+ */
+static int check_stack_write(struct bpf_verifier_env *env,
+ int ptr_regno, int off, int size,
+ int value_regno, int insn_idx)
+{
+ struct bpf_reg_state *reg = reg_state(env, ptr_regno);
+ struct bpf_func_state *state = func(env, reg);
+ int err;
+
+ if (tnum_is_const(reg->var_off)) {
+ off += reg->var_off.value;
+ err = check_stack_write_fixed_off(env, state, off, size,
+ value_regno, insn_idx);
+ } else {
+ /* Variable offset stack reads need more conservative handling
+ * than fixed offset ones.
+ */
+ err = check_stack_write_var_off(env, state,
+ ptr_regno, off, size,
+ value_regno, insn_idx);
+ }
+ return err;
}
static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
@@ -2856,11 +3167,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
return -EACCES;
}
-static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
-{
- return cur_regs(env) + regno;
-}
-
static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
{
return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
@@ -2979,8 +3285,8 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
break;
case PTR_TO_STACK:
pointer_desc = "stack ";
- /* The stack spill tracking logic in check_stack_write()
- * and check_stack_read() relies on stack accesses being
+ /* The stack spill tracking logic in check_stack_write_fixed_off()
+ * and check_stack_read_fixed_off() relies on stack accesses being
* aligned.
*/
strict = true;
@@ -3072,9 +3378,7 @@ process_func:
continue_func:
subprog_end = subprog[idx + 1].start;
for (; i < subprog_end; i++) {
- if (insn[i].code != (BPF_JMP | BPF_CALL))
- continue;
- if (insn[i].src_reg != BPF_PSEUDO_CALL)
+ if (!bpf_pseudo_call(insn + i))
continue;
/* remember insn and function to return to */
ret_insn[frame] = i + 1;
@@ -3398,6 +3702,91 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
return 0;
}
+/* Check that the stack access at the given offset is within bounds. The
+ * maximum valid offset is -1.
+ *
+ * The minimum valid offset is -MAX_BPF_STACK for writes, and
+ * -state->allocated_stack for reads.
+ */
+static int check_stack_slot_within_bounds(int off,
+ struct bpf_func_state *state,
+ enum bpf_access_type t)
+{
+ int min_valid_off;
+
+ if (t == BPF_WRITE)
+ min_valid_off = -MAX_BPF_STACK;
+ else
+ min_valid_off = -state->allocated_stack;
+
+ if (off < min_valid_off || off > -1)
+ return -EACCES;
+ return 0;
+}
+
+/* Check that the stack access at 'regno + off' falls within the maximum stack
+ * bounds.
+ *
+ * 'off' includes `regno->offset`, but not its dynamic part (if any).
+ */
+static int check_stack_access_within_bounds(
+ struct bpf_verifier_env *env,
+ int regno, int off, int access_size,
+ enum stack_access_src src, enum bpf_access_type type)
+{
+ struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_reg_state *reg = regs + regno;
+ struct bpf_func_state *state = func(env, reg);
+ int min_off, max_off;
+ int err;
+ char *err_extra;
+
+ if (src == ACCESS_HELPER)
+ /* We don't know if helpers are reading or writing (or both). */
+ err_extra = " indirect access to";
+ else if (type == BPF_READ)
+ err_extra = " read from";
+ else
+ err_extra = " write to";
+
+ if (tnum_is_const(reg->var_off)) {
+ min_off = reg->var_off.value + off;
+ if (access_size > 0)
+ max_off = min_off + access_size - 1;
+ else
+ max_off = min_off;
+ } else {
+ if (reg->smax_value >= BPF_MAX_VAR_OFF ||
+ reg->smin_value <= -BPF_MAX_VAR_OFF) {
+ verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
+ err_extra, regno);
+ return -EACCES;
+ }
+ min_off = reg->smin_value + off;
+ if (access_size > 0)
+ max_off = reg->smax_value + off + access_size - 1;
+ else
+ max_off = min_off;
+ }
+
+ err = check_stack_slot_within_bounds(min_off, state, type);
+ if (!err)
+ err = check_stack_slot_within_bounds(max_off, state, type);
+
+ if (err) {
+ if (tnum_is_const(reg->var_off)) {
+ verbose(env, "invalid%s stack R%d off=%d size=%d\n",
+ err_extra, regno, off, access_size);
+ } else {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
+ err_extra, regno, tn_buf, access_size);
+ }
+ }
+ return err;
+}
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
@@ -3513,8 +3902,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
}
} else if (reg->type == PTR_TO_STACK) {
- off += reg->var_off.value;
- err = check_stack_access(env, reg, off, size);
+ /* Basic bounds checks. */
+ err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
if (err)
return err;
@@ -3523,12 +3912,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err)
return err;
- if (t == BPF_WRITE)
- err = check_stack_write(env, state, off, size,
- value_regno, insn_idx);
- else
- err = check_stack_read(env, state, off, size,
+ if (t == BPF_READ)
+ err = check_stack_read(env, regno, off, size,
value_regno);
+ else
+ err = check_stack_write(env, regno, off, size,
+ value_regno, insn_idx);
} else if (reg_is_pkt_pointer(reg)) {
if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
verbose(env, "cannot write into packet\n");
@@ -3604,13 +3993,30 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return err;
}
-static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
+static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
{
+ int load_reg;
int err;
- if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
- insn->imm != 0) {
- verbose(env, "BPF_XADD uses reserved fields\n");
+ switch (insn->imm) {
+ case BPF_ADD:
+ case BPF_ADD | BPF_FETCH:
+ case BPF_AND:
+ case BPF_AND | BPF_FETCH:
+ case BPF_OR:
+ case BPF_OR | BPF_FETCH:
+ case BPF_XOR:
+ case BPF_XOR | BPF_FETCH:
+ case BPF_XCHG:
+ case BPF_CMPXCHG:
+ break;
+ default:
+ verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
+ return -EINVAL;
+ }
+
+ if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
+ verbose(env, "invalid atomic operand size\n");
return -EINVAL;
}
@@ -3624,6 +4030,13 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
if (err)
return err;
+ if (insn->imm == BPF_CMPXCHG) {
+ /* Check comparison of R0 with memory location */
+ err = check_reg_arg(env, BPF_REG_0, SRC_OP);
+ if (err)
+ return err;
+ }
+
if (is_pointer_value(env, insn->src_reg)) {
verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
return -EACCES;
@@ -3633,66 +4046,91 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
is_pkt_reg(env, insn->dst_reg) ||
is_flow_key_reg(env, insn->dst_reg) ||
is_sk_reg(env, insn->dst_reg)) {
- verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
+ verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str[reg_state(env, insn->dst_reg)->type]);
return -EACCES;
}
- /* check whether atomic_add can read the memory */
+ if (insn->imm & BPF_FETCH) {
+ if (insn->imm == BPF_CMPXCHG)
+ load_reg = BPF_REG_0;
+ else
+ load_reg = insn->src_reg;
+
+ /* check and record load of old value */
+ err = check_reg_arg(env, load_reg, DST_OP);
+ if (err)
+ return err;
+ } else {
+ /* This instruction accesses a memory location but doesn't
+ * actually load it into a register.
+ */
+ load_reg = -1;
+ }
+
+ /* check whether we can read the memory */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_READ, -1, true);
+ BPF_SIZE(insn->code), BPF_READ, load_reg, true);
if (err)
return err;
- /* check whether atomic_add can write into the same memory */
- return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_WRITE, -1, true);
-}
-
-static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
- int off, int access_size,
- bool zero_size_allowed)
-{
- struct bpf_reg_state *reg = reg_state(env, regno);
-
- if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
- access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
- if (tnum_is_const(reg->var_off)) {
- verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
- regno, off, access_size);
- } else {
- char tn_buf[48];
+ /* check whether we can write into the same memory */
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+ BPF_SIZE(insn->code), BPF_WRITE, -1, true);
+ if (err)
+ return err;
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
- regno, tn_buf, access_size);
- }
- return -EACCES;
- }
return 0;
}
-/* when register 'regno' is passed into function that will read 'access_size'
- * bytes from that pointer, make sure that it's within stack boundary
- * and all elements of stack are initialized.
- * Unlike most pointer bounds-checking functions, this one doesn't take an
- * 'off' argument, so it has to add in reg->off itself.
+/* When register 'regno' is used to read the stack (either directly or through
+ * a helper function) make sure that it's within stack boundary and, depending
+ * on the access type, that all elements of the stack are initialized.
+ *
+ * 'off' includes 'regno->off', but not its dynamic part (if any).
+ *
+ * All registers that have been spilled on the stack in the slots within the
+ * read offsets are marked as read.
*/
-static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
- int access_size, bool zero_size_allowed,
- struct bpf_call_arg_meta *meta)
+static int check_stack_range_initialized(
+ struct bpf_verifier_env *env, int regno, int off,
+ int access_size, bool zero_size_allowed,
+ enum stack_access_src type, struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_func_state *state = func(env, reg);
int err, min_off, max_off, i, j, slot, spi;
+ char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
+ enum bpf_access_type bounds_check_type;
+ /* Some accesses can write anything into the stack, others are
+ * read-only.
+ */
+ bool clobber = false;
+
+ if (access_size == 0 && !zero_size_allowed) {
+ verbose(env, "invalid zero-sized read\n");
+ return -EACCES;
+ }
+
+ if (type == ACCESS_HELPER) {
+ /* The bounds checks for writes are more permissive than for
+ * reads. However, if raw_mode is not set, we'll do extra
+ * checks below.
+ */
+ bounds_check_type = BPF_WRITE;
+ clobber = true;
+ } else {
+ bounds_check_type = BPF_READ;
+ }
+ err = check_stack_access_within_bounds(env, regno, off, access_size,
+ type, bounds_check_type);
+ if (err)
+ return err;
+
if (tnum_is_const(reg->var_off)) {
- min_off = max_off = reg->var_off.value + reg->off;
- err = __check_stack_boundary(env, regno, min_off, access_size,
- zero_size_allowed);
- if (err)
- return err;
+ min_off = max_off = reg->var_off.value + off;
} else {
/* Variable offset is prohibited for unprivileged mode for
* simplicity since it requires corresponding support in
@@ -3703,8 +4141,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
- regno, tn_buf);
+ verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
+ regno, err_extra, tn_buf);
return -EACCES;
}
/* Only initialized buffer on stack is allowed to be accessed
@@ -3716,28 +4154,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
if (meta && meta->raw_mode)
meta = NULL;
- if (reg->smax_value >= BPF_MAX_VAR_OFF ||
- reg->smax_value <= -BPF_MAX_VAR_OFF) {
- verbose(env, "R%d unbounded indirect variable offset stack access\n",
- regno);
- return -EACCES;
- }
- min_off = reg->smin_value + reg->off;
- max_off = reg->smax_value + reg->off;
- err = __check_stack_boundary(env, regno, min_off, access_size,
- zero_size_allowed);
- if (err) {
- verbose(env, "R%d min value is outside of stack bound\n",
- regno);
- return err;
- }
- err = __check_stack_boundary(env, regno, max_off, access_size,
- zero_size_allowed);
- if (err) {
- verbose(env, "R%d max value is outside of stack bound\n",
- regno);
- return err;
- }
+ min_off = reg->smin_value + off;
+ max_off = reg->smax_value + off;
}
if (meta && meta->raw_mode) {
@@ -3757,8 +4175,10 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
if (*stype == STACK_MISC)
goto mark;
if (*stype == STACK_ZERO) {
- /* helper can write anything into the stack */
- *stype = STACK_MISC;
+ if (clobber) {
+ /* helper can write anything into the stack */
+ *stype = STACK_MISC;
+ }
goto mark;
}
@@ -3769,22 +4189,24 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
if (state->stack[spi].slot_type[0] == STACK_SPILL &&
(state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
env->allow_ptr_leaks)) {
- __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
- for (j = 0; j < BPF_REG_SIZE; j++)
- state->stack[spi].slot_type[j] = STACK_MISC;
+ if (clobber) {
+ __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ state->stack[spi].slot_type[j] = STACK_MISC;
+ }
goto mark;
}
err:
if (tnum_is_const(reg->var_off)) {
- verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
- min_off, i - min_off, access_size);
+ verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
+ err_extra, regno, min_off, i - min_off, access_size);
} else {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
- tn_buf, i - min_off, access_size);
+ verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
+ err_extra, regno, tn_buf, i - min_off, access_size);
}
return -EACCES;
mark:
@@ -3833,8 +4255,10 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
"rdwr",
&env->prog->aux->max_rdwr_access);
case PTR_TO_STACK:
- return check_stack_boundary(env, regno, access_size,
- zero_size_allowed, meta);
+ return check_stack_range_initialized(
+ env,
+ regno, reg->off, access_size,
+ zero_size_allowed, ACCESS_HELPER, meta);
default: /* scalar_value or invalid ptr */
/* Allow zero-byte read from NULL, regardless of pointer type */
if (zero_size_allowed && access_size == 0 &&
@@ -3848,6 +4272,29 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
}
}
+int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ u32 regno, u32 mem_size)
+{
+ if (register_is_null(reg))
+ return 0;
+
+ if (reg_type_may_be_null(reg->type)) {
+ /* Assuming that the register contains a value check if the memory
+ * access is safe. Temporarily save and restore the register's state as
+ * the conversion shouldn't be visible to a caller.
+ */
+ const struct bpf_reg_state saved_reg = *reg;
+ int rv;
+
+ mark_ptr_not_null_reg(reg);
+ rv = check_helper_mem_access(env, regno, mem_size, true, NULL);
+ *reg = saved_reg;
+ return rv;
+ }
+
+ return check_helper_mem_access(env, regno, mem_size, true, NULL);
+}
+
/* Implementation details:
* bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
* Two bpf_map_lookups (even with the same key) will have different reg->id.
@@ -4319,7 +4766,7 @@ skip_type_check:
err = mark_chain_precision(env, regno);
} else if (arg_type_is_alloc_size(arg_type)) {
if (!tnum_is_const(reg->var_off)) {
- verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
+ verbose(env, "R%d is not a known constant'\n",
regno);
return -EACCES;
}
@@ -4832,8 +5279,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
subprog);
clear_caller_saved_regs(env, caller->regs);
- /* All global functions return SCALAR_VALUE */
+ /* All global functions return a 64-bit SCALAR_VALUE */
mark_reg_unknown(env, caller->regs, BPF_REG_0);
+ caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
/* continue with next insn after call */
return 0;
@@ -5311,7 +5759,7 @@ static bool signed_add_overflows(s64 a, s64 b)
return res < a;
}
-static bool signed_add32_overflows(s64 a, s64 b)
+static bool signed_add32_overflows(s32 a, s32 b)
{
/* Do the add in u32, where overflow is well-defined */
s32 res = (s32)((u32)a + (u32)b);
@@ -5321,7 +5769,7 @@ static bool signed_add32_overflows(s64 a, s64 b)
return res < a;
}
-static bool signed_sub_overflows(s32 a, s32 b)
+static bool signed_sub_overflows(s64 a, s64 b)
{
/* Do the sub in u64, where overflow is well-defined */
s64 res = (s64)((u64)a - (u64)b);
@@ -5333,7 +5781,7 @@ static bool signed_sub_overflows(s32 a, s32 b)
static bool signed_sub32_overflows(s32 a, s32 b)
{
- /* Do the sub in u64, where overflow is well-defined */
+ /* Do the sub in u32, where overflow is well-defined */
s32 res = (s32)((u32)a - (u32)b);
if (b < 0)
@@ -5498,6 +5946,41 @@ do_sim:
return !ret ? -EFAULT : 0;
}
+/* check that stack access falls within stack limits and that 'reg' doesn't
+ * have a variable offset.
+ *
+ * Variable offset is prohibited for unprivileged mode for simplicity since it
+ * requires corresponding support in Spectre masking for stack ALU. See also
+ * retrieve_ptr_limit().
+ *
+ *
+ * 'off' includes 'reg->off'.
+ */
+static int check_stack_access_for_ptr_arithmetic(
+ struct bpf_verifier_env *env,
+ int regno,
+ const struct bpf_reg_state *reg,
+ int off)
+{
+ if (!tnum_is_const(reg->var_off)) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
+ regno, tn_buf, off);
+ return -EACCES;
+ }
+
+ if (off >= 0 || off < -MAX_BPF_STACK) {
+ verbose(env, "R%d stack pointer arithmetic goes out of range, "
+ "prohibited for !root; off=%d\n", regno, off);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+
/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
* Caller should also handle BPF_MOV case separately.
* If we return -EACCES, caller may want to try again treating pointer as a
@@ -5741,10 +6224,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
"prohibited for !root\n", dst);
return -EACCES;
} else if (dst_reg->type == PTR_TO_STACK &&
- check_stack_access(env, dst_reg, dst_reg->off +
- dst_reg->var_off.value, 1)) {
- verbose(env, "R%d stack pointer arithmetic goes out of range, "
- "prohibited for !root\n", dst);
+ check_stack_access_for_ptr_arithmetic(
+ env, dst, dst_reg, dst_reg->off +
+ dst_reg->var_off.value)) {
return -EACCES;
}
}
@@ -6223,7 +6705,7 @@ static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
* 3) the signed bounds cross zero, so they tell us nothing
* about the result
* If the value in dst_reg is known nonnegative, then again the
- * unsigned bounts capture the signed bounds.
+ * unsigned bounds capture the signed bounds.
* Thus, in all cases it suffices to blow away our signed bounds
* and rely on inferring new ones from the unsigned bounds and
* var_off of the result.
@@ -6254,7 +6736,7 @@ static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
* 3) the signed bounds cross zero, so they tell us nothing
* about the result
* If the value in dst_reg is known nonnegative, then again the
- * unsigned bounts capture the signed bounds.
+ * unsigned bounds capture the signed bounds.
* Thus, in all cases it suffices to blow away our signed bounds
* and rely on inferring new ones from the unsigned bounds and
* var_off of the result.
@@ -6875,7 +7357,7 @@ static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
case BPF_JSGT:
if (reg->s32_min_value > sval)
return 1;
- else if (reg->s32_max_value < sval)
+ else if (reg->s32_max_value <= sval)
return 0;
break;
case BPF_JLT:
@@ -6948,7 +7430,7 @@ static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
case BPF_JSGT:
if (reg->smin_value > sval)
return 1;
- else if (reg->smax_value < sval)
+ else if (reg->smax_value <= sval)
return 0;
break;
case BPF_JLT:
@@ -7324,43 +7806,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
}
if (is_null) {
reg->type = SCALAR_VALUE;
- } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
- const struct bpf_map *map = reg->map_ptr;
-
- if (map->inner_map_meta) {
- reg->type = CONST_PTR_TO_MAP;
- reg->map_ptr = map->inner_map_meta;
- } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
- reg->type = PTR_TO_XDP_SOCK;
- } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
- map->map_type == BPF_MAP_TYPE_SOCKHASH) {
- reg->type = PTR_TO_SOCKET;
- } else {
- reg->type = PTR_TO_MAP_VALUE;
- }
- } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
- reg->type = PTR_TO_SOCKET;
- } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
- reg->type = PTR_TO_SOCK_COMMON;
- } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
- reg->type = PTR_TO_TCP_SOCK;
- } else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
- reg->type = PTR_TO_BTF_ID;
- } else if (reg->type == PTR_TO_MEM_OR_NULL) {
- reg->type = PTR_TO_MEM;
- } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) {
- reg->type = PTR_TO_RDONLY_BUF;
- } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) {
- reg->type = PTR_TO_RDWR_BUF;
- }
- if (is_null) {
/* We don't need id and ref_obj_id from this point
* onwards anymore, thus we should better reset it,
* so that state pruning has chances to take effect.
*/
reg->id = 0;
reg->ref_obj_id = 0;
- } else if (!reg_may_point_to_spin_lock(reg)) {
+
+ return;
+ }
+
+ mark_ptr_not_null_reg(reg);
+
+ if (!reg_may_point_to_spin_lock(reg)) {
/* For not-NULL ptr, reg->ref_obj_id will be reset
* in release_reg_references().
*
@@ -7943,6 +8401,9 @@ static int check_return_code(struct bpf_verifier_env *env)
env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
range = tnum_range(1, 1);
+ if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
+ env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
+ range = tnum_range(0, 3);
break;
case BPF_PROG_TYPE_CGROUP_SKB:
if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
@@ -8588,7 +9049,11 @@ static bool range_within(struct bpf_reg_state *old,
return old->umin_value <= cur->umin_value &&
old->umax_value >= cur->umax_value &&
old->smin_value <= cur->smin_value &&
- old->smax_value >= cur->smax_value;
+ old->smax_value >= cur->smax_value &&
+ old->u32_min_value <= cur->u32_min_value &&
+ old->u32_max_value >= cur->u32_max_value &&
+ old->s32_min_value <= cur->s32_min_value &&
+ old->s32_max_value >= cur->s32_max_value;
}
/* Maximum number of register states that can exist at once */
@@ -9524,14 +9989,19 @@ static int do_check(struct bpf_verifier_env *env)
} else if (class == BPF_STX) {
enum bpf_reg_type *prev_dst_type, dst_reg_type;
- if (BPF_MODE(insn->code) == BPF_XADD) {
- err = check_xadd(env, env->insn_idx, insn);
+ if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+ err = check_atomic(env, env->insn_idx, insn);
if (err)
return err;
env->insn_idx++;
continue;
}
+ if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
+ verbose(env, "BPF_STX uses reserved fields\n");
+ return -EINVAL;
+ }
+
/* check src1 operand */
err = check_reg_arg(env, insn->src_reg, SRC_OP);
if (err)
@@ -9703,6 +10173,36 @@ process_bpf_exit:
return 0;
}
+static int find_btf_percpu_datasec(struct btf *btf)
+{
+ const struct btf_type *t;
+ const char *tname;
+ int i, n;
+
+ /*
+ * Both vmlinux and module each have their own ".data..percpu"
+ * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
+ * types to look at only module's own BTF types.
+ */
+ n = btf_nr_types(btf);
+ if (btf_is_module(btf))
+ i = btf_nr_types(btf_vmlinux);
+ else
+ i = 1;
+
+ for(; i < n; i++) {
+ t = btf_type_by_id(btf, i);
+ if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
+ continue;
+
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!strcmp(tname, ".data..percpu"))
+ return i;
+ }
+
+ return -ENOENT;
+}
+
/* replace pseudo btf_id with kernel symbol address */
static int check_pseudo_btf_id(struct bpf_verifier_env *env,
struct bpf_insn *insn,
@@ -9710,48 +10210,57 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
{
const struct btf_var_secinfo *vsi;
const struct btf_type *datasec;
+ struct btf_mod_pair *btf_mod;
const struct btf_type *t;
const char *sym_name;
bool percpu = false;
u32 type, id = insn->imm;
+ struct btf *btf;
s32 datasec_id;
u64 addr;
- int i;
+ int i, btf_fd, err;
- if (!btf_vmlinux) {
- verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
- return -EINVAL;
- }
-
- if (insn[1].imm != 0) {
- verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
- return -EINVAL;
+ btf_fd = insn[1].imm;
+ if (btf_fd) {
+ btf = btf_get_by_fd(btf_fd);
+ if (IS_ERR(btf)) {
+ verbose(env, "invalid module BTF object FD specified.\n");
+ return -EINVAL;
+ }
+ } else {
+ if (!btf_vmlinux) {
+ verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
+ return -EINVAL;
+ }
+ btf = btf_vmlinux;
+ btf_get(btf);
}
- t = btf_type_by_id(btf_vmlinux, id);
+ t = btf_type_by_id(btf, id);
if (!t) {
verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
- return -ENOENT;
+ err = -ENOENT;
+ goto err_put;
}
if (!btf_type_is_var(t)) {
- verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
- id);
- return -EINVAL;
+ verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
+ err = -EINVAL;
+ goto err_put;
}
- sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
+ sym_name = btf_name_by_offset(btf, t->name_off);
addr = kallsyms_lookup_name(sym_name);
if (!addr) {
verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
sym_name);
- return -ENOENT;
+ err = -ENOENT;
+ goto err_put;
}
- datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
- BTF_KIND_DATASEC);
+ datasec_id = find_btf_percpu_datasec(btf);
if (datasec_id > 0) {
- datasec = btf_type_by_id(btf_vmlinux, datasec_id);
+ datasec = btf_type_by_id(btf, datasec_id);
for_each_vsi(i, datasec, vsi) {
if (vsi->type == id) {
percpu = true;
@@ -9764,10 +10273,10 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
insn[1].imm = addr >> 32;
type = t->type;
- t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
+ t = btf_type_skip_modifiers(btf, type, NULL);
if (percpu) {
aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
- aux->btf_var.btf = btf_vmlinux;
+ aux->btf_var.btf = btf;
aux->btf_var.btf_id = type;
} else if (!btf_type_is_struct(t)) {
const struct btf_type *ret;
@@ -9775,21 +10284,54 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
u32 tsize;
/* resolve the type size of ksym. */
- ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+ ret = btf_resolve_size(btf, t, &tsize);
if (IS_ERR(ret)) {
- tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ tname = btf_name_by_offset(btf, t->name_off);
verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
tname, PTR_ERR(ret));
- return -EINVAL;
+ err = -EINVAL;
+ goto err_put;
}
aux->btf_var.reg_type = PTR_TO_MEM;
aux->btf_var.mem_size = tsize;
} else {
aux->btf_var.reg_type = PTR_TO_BTF_ID;
- aux->btf_var.btf = btf_vmlinux;
+ aux->btf_var.btf = btf;
aux->btf_var.btf_id = type;
}
+
+ /* check whether we recorded this BTF (and maybe module) already */
+ for (i = 0; i < env->used_btf_cnt; i++) {
+ if (env->used_btfs[i].btf == btf) {
+ btf_put(btf);
+ return 0;
+ }
+ }
+
+ if (env->used_btf_cnt >= MAX_USED_BTFS) {
+ err = -E2BIG;
+ goto err_put;
+ }
+
+ btf_mod = &env->used_btfs[env->used_btf_cnt];
+ btf_mod->btf = btf;
+ btf_mod->module = NULL;
+
+ /* if we reference variables from kernel module, bump its refcount */
+ if (btf_is_module(btf)) {
+ btf_mod->module = btf_try_get_module(btf);
+ if (!btf_mod->module) {
+ err = -ENXIO;
+ goto err_put;
+ }
+ }
+
+ env->used_btf_cnt++;
+
return 0;
+err_put:
+ btf_put(btf);
+ return err;
}
static int check_map_prealloc(struct bpf_map *map)
@@ -9891,15 +10433,22 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_HASH:
case BPF_MAP_TYPE_LRU_HASH:
case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
if (!is_preallocated_map(map)) {
verbose(env,
- "Sleepable programs can only use preallocated hash maps\n");
+ "Sleepable programs can only use preallocated maps\n");
return -EINVAL;
}
break;
+ case BPF_MAP_TYPE_RINGBUF:
+ break;
default:
verbose(env,
- "Sleepable programs can only use array and hash maps\n");
+ "Sleepable programs can only use array, hash, and ringbuf maps\n");
return -EINVAL;
}
@@ -9936,13 +10485,6 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
return -EINVAL;
}
- if (BPF_CLASS(insn->code) == BPF_STX &&
- ((BPF_MODE(insn->code) != BPF_MEM &&
- BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
- verbose(env, "BPF_STX uses reserved fields\n");
- return -EINVAL;
- }
-
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_insn_aux_data *aux;
struct bpf_map *map;
@@ -10086,6 +10628,13 @@ static void release_maps(struct bpf_verifier_env *env)
env->used_map_cnt);
}
+/* drop refcnt of maps used by the rejected program */
+static void release_btfs(struct bpf_verifier_env *env)
+{
+ __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
+ env->used_btf_cnt);
+}
+
/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
{
@@ -10457,6 +11006,7 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
for (i = 0; i < len; i++) {
int adj_idx = i + delta;
struct bpf_insn insn;
+ u8 load_reg;
insn = insns[adj_idx];
if (!aux[adj_idx].zext_dst) {
@@ -10499,9 +11049,27 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
if (!bpf_jit_needs_zext())
continue;
+ /* zext_dst means that we want to zero-extend whatever register
+ * the insn defines, which is dst_reg most of the time, with
+ * the notable exception of BPF_STX + BPF_ATOMIC + BPF_FETCH.
+ */
+ if (BPF_CLASS(insn.code) == BPF_STX &&
+ BPF_MODE(insn.code) == BPF_ATOMIC) {
+ /* BPF_STX + BPF_ATOMIC insns without BPF_FETCH do not
+ * define any registers, therefore zext_dst cannot be
+ * set.
+ */
+ if (WARN_ON(!(insn.imm & BPF_FETCH)))
+ return -EINVAL;
+ load_reg = insn.imm == BPF_CMPXCHG ? BPF_REG_0
+ : insn.src_reg;
+ } else {
+ load_reg = insn.dst_reg;
+ }
+
zext_patch[0] = insn;
- zext_patch[1].dst_reg = insn.dst_reg;
- zext_patch[1].src_reg = insn.dst_reg;
+ zext_patch[1].dst_reg = load_reg;
+ zext_patch[1].src_reg = load_reg;
patch = zext_patch;
patch_len = 2;
apply_patch_buffer:
@@ -10717,8 +11285,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
- if (insn->code != (BPF_JMP | BPF_CALL) ||
- insn->src_reg != BPF_PSEUDO_CALL)
+ if (!bpf_pseudo_call(insn))
continue;
/* Upon error here we cannot fall back to interpreter but
* need a hard reject of the program. Thus -EFAULT is
@@ -10759,7 +11326,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
/* BPF_PROG_RUN doesn't call subprogs directly,
* hence main prog stats include the runtime of subprogs.
* subprogs don't have IDs and not reachable via prog_get_next_id
- * func[i]->aux->stats will never be accessed and stays NULL
+ * func[i]->stats will never be accessed and stays NULL
*/
func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
if (!func[i])
@@ -10847,8 +11414,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < env->subprog_cnt; i++) {
insn = func[i]->insnsi;
for (j = 0; j < func[i]->len; j++, insn++) {
- if (insn->code != (BPF_JMP | BPF_CALL) ||
- insn->src_reg != BPF_PSEUDO_CALL)
+ if (!bpf_pseudo_call(insn))
continue;
subprog = insn->off;
insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
@@ -10893,8 +11459,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* later look the same as if they were interpreted only.
*/
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
- if (insn->code != (BPF_JMP | BPF_CALL) ||
- insn->src_reg != BPF_PSEUDO_CALL)
+ if (!bpf_pseudo_call(insn))
continue;
insn->off = env->insn_aux_data[i].call_imm;
subprog = find_subprog(env, i + insn->off + 1);
@@ -10923,8 +11488,7 @@ out_undo_insn:
/* cleanup main prog to be interpreted */
prog->jit_requested = 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
- if (insn->code != (BPF_JMP | BPF_CALL) ||
- insn->src_reg != BPF_PSEUDO_CALL)
+ if (!bpf_pseudo_call(insn))
continue;
insn->off = 0;
insn->imm = env->insn_aux_data[i].call_imm;
@@ -10959,8 +11523,7 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return -EINVAL;
}
for (i = 0; i < prog->len; i++, insn++) {
- if (insn->code != (BPF_JMP | BPF_CALL) ||
- insn->src_reg != BPF_PSEUDO_CALL)
+ if (!bpf_pseudo_call(insn))
continue;
depth = get_callee_stack_depth(env, insn, i);
if (depth < 0)
@@ -10997,30 +11560,30 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
- struct bpf_insn mask_and_div[] = {
- BPF_MOV32_REG(insn->src_reg, insn->src_reg),
- /* Rx div 0 -> 0 */
- BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
+ bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+ struct bpf_insn *patchlet;
+ struct bpf_insn chk_and_div[] = {
+ /* [R,W]x div 0 -> 0 */
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JNE | BPF_K, insn->src_reg,
+ 0, 2, 0),
BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
*insn,
};
- struct bpf_insn mask_and_mod[] = {
- BPF_MOV32_REG(insn->src_reg, insn->src_reg),
- /* Rx mod 0 -> Rx */
- BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
+ struct bpf_insn chk_and_mod[] = {
+ /* [R,W]x mod 0 -> [R,W]x */
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, insn->src_reg,
+ 0, 1 + (is64 ? 0 : 1), 0),
*insn,
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
};
- struct bpf_insn *patchlet;
- if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
- insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
- patchlet = mask_and_div + (is64 ? 1 : 0);
- cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
- } else {
- patchlet = mask_and_mod + (is64 ? 1 : 0);
- cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
- }
+ patchlet = isdiv ? chk_and_div : chk_and_mod;
+ cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
+ ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
if (!new_prog)
@@ -11425,6 +11988,13 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
mark_reg_known_zero(env, regs, i);
else if (regs[i].type == SCALAR_VALUE)
mark_reg_unknown(env, regs, i);
+ else if (regs[i].type == PTR_TO_MEM_OR_NULL) {
+ const u32 mem_size = regs[i].mem_size;
+
+ mark_reg_known_zero(env, regs, i);
+ regs[i].mem_size = mem_size;
+ regs[i].id = ++env->id_gen;
+ }
}
} else {
/* 1st arg to a function */
@@ -12003,6 +12573,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
env->strict_alignment = false;
env->allow_ptr_leaks = bpf_allow_ptr_leaks();
+ env->allow_uninit_stack = bpf_allow_uninit_stack();
env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
env->bypass_spec_v1 = bpf_bypass_spec_v1();
env->bypass_spec_v4 = bpf_bypass_spec_v4();
@@ -12098,7 +12669,10 @@ skip_full_check:
goto err_release_maps;
}
- if (ret == 0 && env->used_map_cnt) {
+ if (ret)
+ goto err_release_maps;
+
+ if (env->used_map_cnt) {
/* if program passed verifier, update used_maps in bpf_prog_info */
env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
sizeof(env->used_maps[0]),
@@ -12112,15 +12686,29 @@ skip_full_check:
memcpy(env->prog->aux->used_maps, env->used_maps,
sizeof(env->used_maps[0]) * env->used_map_cnt);
env->prog->aux->used_map_cnt = env->used_map_cnt;
+ }
+ if (env->used_btf_cnt) {
+ /* if program passed verifier, update used_btfs in bpf_prog_aux */
+ env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
+ sizeof(env->used_btfs[0]),
+ GFP_KERNEL);
+ if (!env->prog->aux->used_btfs) {
+ ret = -ENOMEM;
+ goto err_release_maps;
+ }
+ memcpy(env->prog->aux->used_btfs, env->used_btfs,
+ sizeof(env->used_btfs[0]) * env->used_btf_cnt);
+ env->prog->aux->used_btf_cnt = env->used_btf_cnt;
+ }
+ if (env->used_map_cnt || env->used_btf_cnt) {
/* program is valid. Convert pseudo bpf_ld_imm64 into generic
* bpf_ld_imm64 instructions
*/
convert_pseudo_ld_imm64(env);
}
- if (ret == 0)
- adjust_btf_func(env);
+ adjust_btf_func(env);
err_release_maps:
if (!env->prog->aux->used_maps)
@@ -12128,6 +12716,8 @@ err_release_maps:
* them now. Otherwise free_used_maps() will release them.
*/
release_maps(env);
+ if (!env->prog->aux->used_btfs)
+ release_btfs(env);
/* extension progs temporarily inherit the attach_type of their targets
for verification purposes, so set it back to zero before returning