samples/bpf: update tracex[23] examples to use per-cpu maps

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index b32367c..09c1adc 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -70,7 +70,7 @@
 };
 
 struct bpf_map_def SEC("maps") my_hist_map = {
-	.type = BPF_MAP_TYPE_HASH,
+	.type = BPF_MAP_TYPE_PERCPU_HASH,
 	.key_size = sizeof(struct hist_key),
 	.value_size = sizeof(long),
 	.max_entries = 1024,
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index cd0241c..ab5b19e 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -37,6 +37,8 @@
 static void print_hist_for_pid(int fd, void *task)
 {
 	struct hist_key key = {}, next_key;
+	unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+	long values[nr_cpus];
 	char starstr[MAX_STARS];
 	long value;
 	long data[MAX_INDEX] = {};
@@ -49,7 +51,10 @@
 			key = next_key;
 			continue;
 		}
-		bpf_lookup_elem(fd, &next_key, &value);
+		bpf_lookup_elem(fd, &next_key, values);
+		value = 0;
+		for (i = 0; i < nr_cpus; i++)
+			value += values[i];
 		ind = next_key.index;
 		data[ind] = value;
 		if (value && ind > max_ind)
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
index bf337fb..9974c3d 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3_kern.c
@@ -20,7 +20,7 @@
 /* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
  * example will no longer be meaningful
  */
-SEC("kprobe/blk_mq_start_request")
+SEC("kprobe/blk_start_request")
 int bpf_prog1(struct pt_regs *ctx)
 {
 	long rq = PT_REGS_PARM1(ctx);
@@ -42,13 +42,13 @@
 #define SLOTS 100
 
 struct bpf_map_def SEC("maps") lat_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
 	.key_size = sizeof(u32),
 	.value_size = sizeof(u64),
 	.max_entries = SLOTS,
 };
 
-SEC("kprobe/blk_update_request")
+SEC("kprobe/blk_account_io_completion")
 int bpf_prog2(struct pt_regs *ctx)
 {
 	long rq = PT_REGS_PARM1(ctx);
@@ -81,7 +81,7 @@
 
 	value = bpf_map_lookup_elem(&lat_map, &index);
 	if (value)
-		__sync_fetch_and_add((long *)value, 1);
+		*value += 1;
 
 	return 0;
 }
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index 0aaa933..48716f7 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -20,11 +20,13 @@
 
 static void clear_stats(int fd)
 {
+	unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+	__u64 values[nr_cpus];
 	__u32 key;
-	__u64 value = 0;
 
+	memset(values, 0, sizeof(values));
 	for (key = 0; key < SLOTS; key++)
-		bpf_update_elem(fd, &key, &value, BPF_ANY);
+		bpf_update_elem(fd, &key, values, BPF_ANY);
 }
 
 const char *color[] = {
@@ -75,15 +77,20 @@
 
 static void print_hist(int fd)
 {
-	__u32 key;
-	__u64 value;
-	__u64 cnt[SLOTS];
-	__u64 max_cnt = 0;
+	unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
 	__u64 total_events = 0;
+	long values[nr_cpus];
+	__u64 max_cnt = 0;
+	__u64 cnt[SLOTS];
+	__u64 value;
+	__u32 key;
+	int i;
 
 	for (key = 0; key < SLOTS; key++) {
+		bpf_lookup_elem(fd, &key, values);
 		value = 0;
-		bpf_lookup_elem(fd, &key, &value);
+		for (i = 0; i < nr_cpus; i++)
+			value += values[i];
 		cnt[key] = value;
 		total_events += value;
 		if (value > max_cnt)