From 0d5ddd14a8e67d35fad79caf479cca54a6788cc9 Mon Sep 17 00:00:00 2001
From: "Jon Medhurst (Tixy)" <tixy@linaro.org>
Date: Fri, 2 Aug 2013 18:45:33 +0100
Subject: HMP: Check the system has little cpus before forcing rt tasks onto
 them

It is sometimes desirable to run a kernel with HMP scheduling enabled
on a system which is not big.LITTLE, e.g. when building a multi-platform
kernel, or when testing a big.LITTLE system with one cluster disabled.

We should therefore allow for the situation where is no little domain.

Signed-off-by: Jon Medhurst <tixy@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 kernel/sched/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 50d9e9849ce..fb9b7b74a83 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3841,8 +3841,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
 	if (rt_prio(p->prio)) {
 		p->sched_class = &rt_sched_class;
 #ifdef CONFIG_SCHED_HMP
-		if (cpumask_equal(&p->cpus_allowed, cpu_all_mask))
-			do_set_cpus_allowed(p, &hmp_slow_cpu_mask);
+		if (!cpumask_empty(&hmp_slow_cpu_mask))
+			if (cpumask_equal(&p->cpus_allowed, cpu_all_mask))
+				do_set_cpus_allowed(p, &hmp_slow_cpu_mask);
 #endif
 	}
 	else
-- 
cgit v1.2.3


From 83a3cdb6d37f700761be84a333d574709c5a0d2a Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Thu, 8 Aug 2013 16:27:34 +0100
Subject: HMP: select 'best' task for migration rather than 'current'

When we are looking for a task to migrate up, select the heaviest
one in the first 5 runnable on the runqueue.

Likewise, when looking for a task to offload, select the lightest
one in the first 5 runnable on the runqueue.

Ensure task selected is runnable in the target domain.

This change is necessary in order to implement idle pull in a
sensible manner, but here is used in up-migration and offload to
select the correct target task.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 kernel/sched/fair.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 86 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c849d68a9b7..81579411348 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3512,6 +3512,7 @@ done:
  * fastest domain first.
  */
 DEFINE_PER_CPU(struct hmp_domain *, hmp_cpu_domain);
+static const int hmp_max_tasks = 5;
 
 extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list);
 
@@ -3576,6 +3577,78 @@ static void hmp_offline_cpu(int cpu)
 	if(domain)
 		cpumask_clear_cpu(cpu, &domain->cpus);
 }
+/*
+ * Needed to determine heaviest tasks etc.
+ */
+static inline unsigned int hmp_cpu_is_fastest(int cpu);
+static inline unsigned int hmp_cpu_is_slowest(int cpu);
+static inline struct hmp_domain *hmp_slower_domain(int cpu);
+static inline struct hmp_domain *hmp_faster_domain(int cpu);
+
+/* must hold runqueue lock for queue se is currently on */
+static struct sched_entity *hmp_get_heaviest_task(
+				struct sched_entity *se, int migrate_up)
+{
+	int num_tasks = hmp_max_tasks;
+	struct sched_entity *max_se = se;
+	unsigned long int max_ratio = se->avg.load_avg_ratio;
+	const struct cpumask *hmp_target_mask = NULL;
+
+	if (migrate_up) {
+		struct hmp_domain *hmp;
+		if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq)))
+			return max_se;
+
+		hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
+		hmp_target_mask = &hmp->cpus;
+	}
+
+	while (num_tasks && se) {
+		if (entity_is_task(se) &&
+			(se->avg.load_avg_ratio > max_ratio &&
+			 hmp_target_mask &&
+			 cpumask_intersects(hmp_target_mask,
+				tsk_cpus_allowed(task_of(se))))) {
+			max_se = se;
+			max_ratio = se->avg.load_avg_ratio;
+		}
+		se = __pick_next_entity(se);
+		num_tasks--;
+	}
+	return max_se;
+}
+
+static struct sched_entity *hmp_get_lightest_task(
+				struct sched_entity *se, int migrate_down)
+{
+	int num_tasks = hmp_max_tasks;
+	struct sched_entity *min_se = se;
+	unsigned long int min_ratio = se->avg.load_avg_ratio;
+	const struct cpumask *hmp_target_mask = NULL;
+
+	if (migrate_down) {
+		struct hmp_domain *hmp;
+		if (hmp_cpu_is_slowest(cpu_of(se->cfs_rq->rq)))
+			return min_se;
+
+		hmp = hmp_slower_domain(cpu_of(se->cfs_rq->rq));
+		hmp_target_mask = &hmp->cpus;
+	}
+
+	while (num_tasks && se) {
+		if (entity_is_task(se) &&
+			(se->avg.load_avg_ratio < min_ratio &&
+			hmp_target_mask &&
+				cpumask_intersects(hmp_target_mask,
+				tsk_cpus_allowed(task_of(se))))) {
+			min_se = se;
+			min_ratio = se->avg.load_avg_ratio;
+		}
+		se = __pick_next_entity(se);
+		num_tasks--;
+	}
+	return min_se;
+}
 
 /*
  * Migration thresholds should be in the range [0..1023]
@@ -3665,7 +3738,15 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
 							int cpu)
 {
 	int lowest_cpu=NR_CPUS;
-	__always_unused int lowest_ratio = hmp_domain_min_load(hmp_slower_domain(cpu), &lowest_cpu);
+	struct hmp_domain *hmp;
+	__always_unused int lowest_ratio;
+
+	if (hmp_cpu_is_slowest(cpu))
+		hmp = hmp_cpu_domain(cpu);
+	else
+		hmp = hmp_slower_domain(cpu);
+
+	lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu);
 	/*
 	 * If the lowest-loaded CPU in the domain is allowed by the task affinity
 	 * select that one, otherwise select one which is allowed
@@ -6585,7 +6666,7 @@ static DEFINE_SPINLOCK(hmp_force_migration);
 static void hmp_force_up_migration(int this_cpu)
 {
 	int cpu, target_cpu;
-	struct sched_entity *curr;
+	struct sched_entity *curr, *orig;
 	struct rq *target;
 	unsigned long flags;
 	unsigned int force;
@@ -6611,6 +6692,8 @@ static void hmp_force_up_migration(int this_cpu)
 				cfs_rq = group_cfs_rq(curr);
 			}
 		}
+		orig = curr;
+		curr = hmp_get_heaviest_task(curr, 1);
 		p = task_of(curr);
 		if (hmp_up_migration(cpu, &target_cpu, curr)) {
 			if (!target->active_balance) {
@@ -6628,6 +6711,7 @@ static void hmp_force_up_migration(int this_cpu)
 			 * Selecting the lightest task for offloading will
 			 * require extensive book keeping.
 			 */
+			curr = hmp_get_lightest_task(orig, 1);
 			target->push_cpu = hmp_offload_down(cpu, curr);
 			if (target->push_cpu < NR_CPUS) {
 				target->active_balance = 1;
-- 
cgit v1.2.3


From c05cd3079d0dd31ee5391a2a5c036fdecc67a136 Mon Sep 17 00:00:00 2001
From: Morten Rasmussen <morten.rasmussen@arm.com>
Date: Tue, 6 Aug 2013 16:14:19 +0100
Subject: sched: HMP fix traversing the rb-tree from the curr pointer

The hmp_get_{lightest,heaviest}_task() need to use
__pick_first_entity() to get a pointer to a sched_entity on the rq.
The current is not kept on the rq while running, so its rb-tree node
pointers are no longer valid.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 kernel/sched/fair.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 81579411348..b801eb0330e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3602,6 +3602,8 @@ static struct sched_entity *hmp_get_heaviest_task(
 		hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
 		hmp_target_mask = &hmp->cpus;
 	}
+	/* The currently running task is not on the runqueue */
+	se = __pick_first_entity(cfs_rq_of(se));
 
 	while (num_tasks && se) {
 		if (entity_is_task(se) &&
@@ -3630,10 +3632,11 @@ static struct sched_entity *hmp_get_lightest_task(
 		struct hmp_domain *hmp;
 		if (hmp_cpu_is_slowest(cpu_of(se->cfs_rq->rq)))
 			return min_se;
-
 		hmp = hmp_slower_domain(cpu_of(se->cfs_rq->rq));
 		hmp_target_mask = &hmp->cpus;
 	}
+	/* The currently running task is not on the runqueue */
+	se = __pick_first_entity(cfs_rq_of(se));
 
 	while (num_tasks && se) {
 		if (entity_is_task(se) &&
-- 
cgit v1.2.3


From add684211e0ff4a08f419f6547fc311a72c35391 Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Thu, 8 Aug 2013 16:31:07 +0100
Subject: sched: track per-rq 'last migration time'

Track when migrations were performed to runqueues.

Use this to decide between runqueues as migration targets when run
queues in an hmp domain have equal load.

Intention is to spread migration load amongst CPUs more fairly.

When all CPUs in an hmp domain are fully loaded, the existing code
always selects the last CPU as a migration target - this is unfair
and little better than doing no selection.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 kernel/sched/fair.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b801eb0330e..62302a37279 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3764,17 +3764,21 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
 static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
 {
 	struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
-
-	se->avg.hmp_last_up_migration = cfs_rq_clock_task(cfs_rq);
+	u64 now = cfs_rq_clock_task(cfs_rq);
+	se->avg.hmp_last_up_migration = now;
 	se->avg.hmp_last_down_migration = 0;
+	cpu_rq(cpu)->avg.hmp_last_up_migration = now;
+	cpu_rq(cpu)->avg.hmp_last_down_migration = 0;
 }
 
 static inline void hmp_next_down_delay(struct sched_entity *se, int cpu)
 {
 	struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
-
-	se->avg.hmp_last_down_migration = cfs_rq_clock_task(cfs_rq);
+	u64 now = cfs_rq_clock_task(cfs_rq);
+	se->avg.hmp_last_down_migration = now;
 	se->avg.hmp_last_up_migration = 0;
+	cpu_rq(cpu)->avg.hmp_last_down_migration = now;
+	cpu_rq(cpu)->avg.hmp_last_up_migration = 0;
 }
 
 #ifdef CONFIG_HMP_VARIABLE_SCALE
@@ -3946,15 +3950,37 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 {
 	int cpu;
 	int min_cpu_runnable_temp = NR_CPUS;
+	u64 min_target_last_migration = ULLONG_MAX;
+	u64 curr_last_migration;
 	unsigned long min_runnable_load = INT_MAX;
-	unsigned long contrib;
+	unsigned long scaled_min_runnable_load = INT_MAX;
+	unsigned long contrib, scaled_contrib;
+	struct sched_avg *avg;
 
 	for_each_cpu_mask(cpu, hmpd->cpus) {
+		avg = &cpu_rq(cpu)->avg;
+		/* used for both up and down migration */
+		curr_last_migration = avg->hmp_last_up_migration ?
+			avg->hmp_last_up_migration : avg->hmp_last_down_migration;
+
 		/* don't use the divisor in the loop, just at the end */
-		contrib = cpu_rq(cpu)->avg.runnable_avg_sum * scale_load_down(1024);
-		if (contrib < min_runnable_load) {
+		contrib = avg->runnable_avg_sum * scale_load_down(1024);
+		scaled_contrib = contrib >> 22;
+
+		if ((contrib < min_runnable_load) ||
+			(scaled_contrib == scaled_min_runnable_load &&
+			 curr_last_migration < min_target_last_migration)) {
+			/*
+			 * if the load is the same target the CPU with
+			 * the longest time since a migration.
+			 * This is to spread migration load between
+			 * members of a domain more evenly when the
+			 * domain is fully loaded
+			 */
 			min_runnable_load = contrib;
+			scaled_min_runnable_load = scaled_contrib;
 			min_cpu_runnable_temp = cpu;
+			min_target_last_migration = curr_last_migration;
 		}
 	}
 
-- 
cgit v1.2.3


From e580deb7feb9cd3bdf4c8e323ea84f22cd8c7c8a Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Thu, 8 Aug 2013 16:32:31 +0100
Subject: HMP: Modify the runqueue stats to add a new child stat

The original intent here was to track unweighted runqueue load
with less resolution so we could use the least-recently-disturbed
runqueue to choose between 'closely related' load levels.

However, after experimenting with the resolution it turns out
that the following algorithm is highly beneficial for mobile
workloads.

In hmp_domain_min_load:

  * If any CPU is zero, the overall load is zero
  * If no CPUs are idle, the domain is 'fully loaded'

Additionally, the time since last migration count is used to
discriminate between idle CPUs.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 kernel/sched/fair.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 62302a37279..c3ba73750d6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1582,9 +1582,10 @@ static inline void __update_task_entity_contrib(struct sched_entity *se)
 }
 
 /* Compute the current contribution to load_avg by se, return any delta */
-static long __update_entity_load_avg_contrib(struct sched_entity *se)
+static long __update_entity_load_avg_contrib(struct sched_entity *se, long *ratio)
 {
 	long old_contrib = se->avg.load_avg_contrib;
+	long old_ratio   = se->avg.load_avg_ratio;
 
 	if (entity_is_task(se)) {
 		__update_task_entity_contrib(se);
@@ -1593,6 +1594,8 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
 		__update_group_entity_contrib(se);
 	}
 
+	if (ratio)
+		*ratio = se->avg.load_avg_ratio - old_ratio;
 	return se->avg.load_avg_contrib - old_contrib;
 }
 
@@ -1612,7 +1615,7 @@ static inline void update_entity_load_avg(struct sched_entity *se,
 					  int update_cfs_rq)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	long contrib_delta;
+	long contrib_delta, ratio_delta;
 	u64 now;
 	int cpu = -1;   /* not used in normal case */
 
@@ -1632,15 +1635,17 @@ static inline void update_entity_load_avg(struct sched_entity *se,
 			cfs_rq->curr == se, cpu))
 		return;
 
-	contrib_delta = __update_entity_load_avg_contrib(se);
+	contrib_delta = __update_entity_load_avg_contrib(se, &ratio_delta);
 
 	if (!update_cfs_rq)
 		return;
 
-	if (se->on_rq)
+	if (se->on_rq) {
 		cfs_rq->runnable_load_avg += contrib_delta;
-	else
+		rq_of(cfs_rq)->avg.load_avg_ratio += ratio_delta;
+	} else {
 		subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
+	}
 }
 
 /*
@@ -1673,7 +1678,6 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
 
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 {
-	u32 contrib;
 	int cpu = -1;	/* not used in normal case */
 
 #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
@@ -1682,9 +1686,7 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 	__update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable,
 				     runnable, cpu);
 	__update_tg_runnable_avg(&rq->avg, &rq->cfs);
-	contrib = rq->avg.runnable_avg_sum * scale_load_down(1024);
-	contrib /= (rq->avg.runnable_avg_period + 1);
-	trace_sched_rq_runnable_ratio(cpu_of(rq), scale_load(contrib));
+	trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio);
 	trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg);
 }
 
@@ -1727,6 +1729,8 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
 	}
 
 	cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
+	rq_of(cfs_rq)->avg.load_avg_ratio += se->avg.load_avg_ratio;
+
 	/* we force update consideration on load-balancer moves */
 	update_cfs_rq_blocked_load(cfs_rq, !wakeup);
 }
@@ -1745,6 +1749,8 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
 	update_cfs_rq_blocked_load(cfs_rq, !sleep);
 
 	cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
+	rq_of(cfs_rq)->avg.load_avg_ratio -= se->avg.load_avg_ratio;
+
 	if (sleep) {
 		cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
 		se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
@@ -3964,8 +3970,8 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 			avg->hmp_last_up_migration : avg->hmp_last_down_migration;
 
 		/* don't use the divisor in the loop, just at the end */
-		contrib = avg->runnable_avg_sum * scale_load_down(1024);
-		scaled_contrib = contrib >> 22;
+		contrib = avg->load_avg_ratio * scale_load_down(1024);
+		scaled_contrib = contrib >> 13;
 
 		if ((contrib < min_runnable_load) ||
 			(scaled_contrib == scaled_min_runnable_load &&
@@ -3988,7 +3994,9 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 		*min_cpu = min_cpu_runnable_temp;
 
 	/* domain will often have at least one empty CPU */
-	return min_runnable_load ? min_runnable_load / (LOAD_AVG_MAX + 1) : 0;
+	trace_printk("hmp_domain_min_load returning %lu\n",
+		min_runnable_load > 1023 ? 1023 : min_runnable_load);
+	return min_runnable_load > 1023 ? 1023 : min_runnable_load;
 }
 
 /*
-- 
cgit v1.2.3


From 72d74c11966159f155bac7f83fe77658c54525fc Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Mon, 15 Jul 2013 16:06:44 +0100
Subject: HMP: Explicitly implement all-load-is-max-load policy for HMP targets

Experimentally, one of the best policies for HMP migration CPU
selection is to completely ignore part-loaded CPUs and only look
for idle ones. If there are no idle ones, we will choose the one
which was least-recently-disturbed.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 kernel/sched/fair.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c3ba73750d6..78f6d028d29 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3959,8 +3959,7 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 	u64 min_target_last_migration = ULLONG_MAX;
 	u64 curr_last_migration;
 	unsigned long min_runnable_load = INT_MAX;
-	unsigned long scaled_min_runnable_load = INT_MAX;
-	unsigned long contrib, scaled_contrib;
+	unsigned long contrib;
 	struct sched_avg *avg;
 
 	for_each_cpu_mask(cpu, hmpd->cpus) {
@@ -3969,12 +3968,17 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 		curr_last_migration = avg->hmp_last_up_migration ?
 			avg->hmp_last_up_migration : avg->hmp_last_down_migration;
 
-		/* don't use the divisor in the loop, just at the end */
-		contrib = avg->load_avg_ratio * scale_load_down(1024);
-		scaled_contrib = contrib >> 13;
+		contrib = avg->load_avg_ratio;
+		/*
+		 * Consider a runqueue completely busy if there is any load
+		 * on it. Definitely not the best for overall fairness, but
+		 * does well in typical Android use cases.
+		 */
+		if (contrib)
+			contrib = 1023;
 
 		if ((contrib < min_runnable_load) ||
-			(scaled_contrib == scaled_min_runnable_load &&
+			(contrib == min_runnable_load &&
 			 curr_last_migration < min_target_last_migration)) {
 			/*
 			 * if the load is the same target the CPU with
@@ -3984,7 +3988,6 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 			 * domain is fully loaded
 			 */
 			min_runnable_load = contrib;
-			scaled_min_runnable_load = scaled_contrib;
 			min_cpu_runnable_temp = cpu;
 			min_target_last_migration = curr_last_migration;
 		}
@@ -3993,10 +3996,7 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 	if (min_cpu)
 		*min_cpu = min_cpu_runnable_temp;
 
-	/* domain will often have at least one empty CPU */
-	trace_printk("hmp_domain_min_load returning %lu\n",
-		min_runnable_load > 1023 ? 1023 : min_runnable_load);
-	return min_runnable_load > 1023 ? 1023 : min_runnable_load;
+	return min_runnable_load;
 }
 
 /*
@@ -4023,10 +4023,9 @@ static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se)
 	if (hmp_cpu_is_slowest(cpu))
 		return NR_CPUS;
 
-	/* Is the current domain fully loaded? */
-	/* load < ~50% */
+	/* Is there an idle CPU in the current domain */
 	min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL);
-	if (min_usage < (NICE_0_LOAD>>1))
+	if (min_usage == 0)
 		return NR_CPUS;
 
 	/* Is the task alone on the cpu? */
@@ -4038,10 +4037,9 @@ static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se)
 	if (hmp_task_starvation(se) > 768)
 		return NR_CPUS;
 
-	/* Does the slower domain have spare cycles? */
+	/* Does the slower domain have any idle CPUs? */
 	min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu);
-	/* load > 50% */
-	if (min_usage > NICE_0_LOAD/2)
+	if (min_usage > 0)
 		return NR_CPUS;
 
 	if (cpumask_test_cpu(dest_cpu, &hmp_slower_domain(cpu)->cpus))
@@ -6501,9 +6499,11 @@ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_enti
 					< hmp_next_up_threshold)
 		return 0;
 
-	/* Target domain load < 94% */
-	if (hmp_domain_min_load(hmp_faster_domain(cpu), target_cpu)
-			> NICE_0_LOAD-64)
+	/* hmp_domain_min_load only returns 0 for an
+	 * idle CPU or 1023 for any partly-busy one.
+	 * Be explicit about requirement for an idle CPU.
+	 */
+	if (hmp_domain_min_load(hmp_faster_domain(cpu), target_cpu) != 0)
 		return 0;
 
 	if (cpumask_intersects(&hmp_faster_domain(cpu)->cpus,
-- 
cgit v1.2.3


From 70845269b55dd275c19d253bbb79b36fbf9a83d6 Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Thu, 8 Aug 2013 16:10:39 +0100
Subject: sched: HMP change nr_running offload metric

rq->nr_running was better than cfs.nr_running, since it includes
all tasks actually on the CPU. However, it includes RT tasks which
we would rather ignore at this point.

Switching to cfs.h_nr_running includes all the CFS tasks but no
RT tasks.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 78f6d028d29..c7c41412f5e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4029,7 +4029,7 @@ static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se)
 		return NR_CPUS;
 
 	/* Is the task alone on the cpu? */
-	if (cpu_rq(cpu)->cfs.nr_running < 2)
+	if (cpu_rq(cpu)->cfs.h_nr_running < 2)
 		return NR_CPUS;
 
 	/* Is the task actually starving? */
-- 
cgit v1.2.3


From 1325a370daa4878e3153e877a68d29a0ab308d3b Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Thu, 8 Aug 2013 16:41:26 +0100
Subject: HMP: Implement idle pull for HMP

When an A15 goes idle, we should up-migrate anything which is
above the threshold and running on an A7.

Reuses the HMP force-migration spinlock, but adds its own new
cpu stopper client.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 kernel/sched/fair.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 160 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c7c41412f5e..afd76bf9433 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6003,7 +6003,9 @@ out_one_pinned:
 out:
 	return ld_moved;
 }
-
+#ifdef CONFIG_SCHED_HMP
+static unsigned int hmp_idle_pull(int this_cpu);
+#endif
 /*
  * idle_balance is called by schedule() if this_cpu is about to become
  * idle. Attempts to pull tasks from other CPUs.
@@ -6048,7 +6050,10 @@ void idle_balance(int this_cpu, struct rq *this_rq)
 		}
 	}
 	rcu_read_unlock();
-
+#ifdef CONFIG_SCHED_HMP
+	if (!pulled_task)
+		pulled_task = hmp_idle_pull(this_cpu);
+#endif
 	raw_spin_lock(&this_rq->lock);
 
 	if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
@@ -6694,6 +6699,79 @@ out_unlock:
 	return 0;
 }
 
+/*
+ * hmp_idle_pull_cpu_stop is run by cpu stopper and used to
+ * migrate a specific task from one runqueue to another.
+ * hmp_idle_pull uses this to push a currently running task
+ * off a runqueue to a faster CPU.
+ * Locking is slightly different than usual.
+ * Based on active_load_balance_stop_cpu and can potentially be merged.
+ */
+static int hmp_idle_pull_cpu_stop(void *data)
+{
+	struct rq *busiest_rq = data;
+	struct task_struct *p = busiest_rq->migrate_task;
+	int busiest_cpu = cpu_of(busiest_rq);
+	int target_cpu = busiest_rq->push_cpu;
+	struct rq *target_rq = cpu_rq(target_cpu);
+	struct sched_domain *sd;
+
+	raw_spin_lock_irq(&busiest_rq->lock);
+
+	/* make sure the requested cpu hasn't gone down in the meantime */
+	if (unlikely(busiest_cpu != smp_processor_id() ||
+		!busiest_rq->active_balance))
+		goto out_unlock;
+
+	/* Is there any task to move? */
+	if (busiest_rq->nr_running <= 1)
+		goto out_unlock;
+
+	/* Task has migrated meanwhile, abort forced migration */
+	if (task_rq(p) != busiest_rq)
+		goto out_unlock;
+
+	/*
+	 * This condition is "impossible", if it occurs
+	 * we need to fix it. Originally reported by
+	 * Bjorn Helgaas on a 128-cpu setup.
+	 */
+	BUG_ON(busiest_rq == target_rq);
+
+	/* move a task from busiest_rq to target_rq */
+	double_lock_balance(busiest_rq, target_rq);
+
+	/* Search for an sd spanning us and the target CPU. */
+	rcu_read_lock();
+	for_each_domain(target_cpu, sd) {
+		if (cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
+			break;
+	}
+	if (likely(sd)) {
+		struct lb_env env = {
+			.sd		= sd,
+			.dst_cpu	= target_cpu,
+			.dst_rq		= target_rq,
+			.src_cpu	= busiest_rq->cpu,
+			.src_rq		= busiest_rq,
+			.idle		= CPU_IDLE,
+		};
+
+		schedstat_inc(sd, alb_count);
+
+		if (move_specific_task(&env, p))
+			schedstat_inc(sd, alb_pushed);
+		else
+			schedstat_inc(sd, alb_failed);
+	}
+	rcu_read_unlock();
+	double_unlock_balance(busiest_rq, target_rq);
+out_unlock:
+	busiest_rq->active_balance = 0;
+	raw_spin_unlock_irq(&busiest_rq->lock);
+	return 0;
+}
+
 static DEFINE_SPINLOCK(hmp_force_migration);
 
 /*
@@ -6766,6 +6844,86 @@ static void hmp_force_up_migration(int this_cpu)
 	}
 	spin_unlock(&hmp_force_migration);
 }
+/*
+ * hmp_idle_pull looks at little domain runqueues to see
+ * if a task should be pulled.
+ *
+ * Reuses hmp_force_migration spinlock.
+ *
+ */
+static unsigned int hmp_idle_pull(int this_cpu)
+{
+	int cpu;
+	struct sched_entity *curr, *orig;
+	struct hmp_domain *hmp_domain = NULL;
+	struct rq *target, *rq;
+	unsigned long flags, ratio = 0;
+	unsigned int force = 0;
+	struct task_struct *p = NULL;
+
+	if (!hmp_cpu_is_slowest(this_cpu))
+		hmp_domain = hmp_slower_domain(this_cpu);
+	if (!hmp_domain)
+		return 0;
+
+	if (!spin_trylock(&hmp_force_migration))
+		return 0;
+
+	/* first select a task */
+	for_each_cpu(cpu, &hmp_domain->cpus) {
+		rq = cpu_rq(cpu);
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		curr = rq->cfs.curr;
+		if (!curr) {
+			raw_spin_unlock_irqrestore(&rq->lock, flags);
+			continue;
+		}
+		if (!entity_is_task(curr)) {
+			struct cfs_rq *cfs_rq;
+
+			cfs_rq = group_cfs_rq(curr);
+			while (cfs_rq) {
+				curr = cfs_rq->curr;
+				if (!entity_is_task(curr))
+					cfs_rq = group_cfs_rq(curr);
+				else
+					cfs_rq = NULL;
+			}
+		}
+		orig = curr;
+		curr = hmp_get_heaviest_task(curr, 1);
+		if (curr->avg.load_avg_ratio > hmp_up_threshold &&
+			curr->avg.load_avg_ratio > ratio) {
+			p = task_of(curr);
+			target = rq;
+			ratio = curr->avg.load_avg_ratio;
+		}
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
+
+	if (!p)
+		goto done;
+
+	/* now we have a candidate */
+	raw_spin_lock_irqsave(&target->lock, flags);
+	if (!target->active_balance && task_rq(p) == target) {
+		target->active_balance = 1;
+		target->push_cpu = this_cpu;
+		target->migrate_task = p;
+		force = 1;
+		trace_sched_hmp_migrate(p, target->push_cpu, 3);
+		hmp_next_up_delay(&p->se, target->push_cpu);
+	}
+	raw_spin_unlock_irqrestore(&target->lock, flags);
+	if (force) {
+		stop_one_cpu_nowait(cpu_of(target),
+			hmp_idle_pull_cpu_stop,
+			target, &target->active_balance_work);
+	}
+done:
+	spin_unlock(&hmp_force_migration);
+	return force;
+}
 #else
 static void hmp_force_up_migration(int this_cpu) { }
 #endif /* CONFIG_SCHED_HMP */
-- 
cgit v1.2.3


From 0d520ee8d4e910d1400e1b21608aff3bbce7ad6f Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Mon, 22 Jul 2013 15:56:28 +0100
Subject: HMP: Access runqueue task clocks directly.

Avoids accesses through cfs_rq going bad when the cpu_rq doesn't
have a cfs member.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 kernel/sched/fair.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index afd76bf9433..bfd27e89399 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3769,8 +3769,8 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
 
 static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
 {
-	struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
-	u64 now = cfs_rq_clock_task(cfs_rq);
+	/* hack - always use clock from first online CPU */
+	u64 now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
 	se->avg.hmp_last_up_migration = now;
 	se->avg.hmp_last_down_migration = 0;
 	cpu_rq(cpu)->avg.hmp_last_up_migration = now;
@@ -3779,8 +3779,8 @@ static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
 
 static inline void hmp_next_down_delay(struct sched_entity *se, int cpu)
 {
-	struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
-	u64 now = cfs_rq_clock_task(cfs_rq);
+	/* hack - always use clock from first online CPU */
+	u64 now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
 	se->avg.hmp_last_down_migration = now;
 	se->avg.hmp_last_up_migration = 0;
 	cpu_rq(cpu)->avg.hmp_last_down_migration = now;
@@ -6481,7 +6481,6 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
 static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se)
 {
 	struct task_struct *p = task_of(se);
-	struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
 	u64 now;
 
 	if (target_cpu)
@@ -6499,7 +6498,8 @@ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_enti
 		return 0;
 
 	/* Let the task load settle before doing another up migration */
-	now = cfs_rq_clock_task(cfs_rq);
+	/* hack - always use clock from first online CPU */
+	now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
 	if (((now - se->avg.hmp_last_up_migration) >> 10)
 					< hmp_next_up_threshold)
 		return 0;
@@ -6522,7 +6522,6 @@ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_enti
 static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
 {
 	struct task_struct *p = task_of(se);
-	struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
 	u64 now;
 
 	if (hmp_cpu_is_slowest(cpu))
@@ -6538,7 +6537,8 @@ static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
 #endif
 
 	/* Let the task load settle before doing another down migration */
-	now = cfs_rq_clock_task(cfs_rq);
+	/* hack - always use clock from first online CPU */
+	now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
 	if (((now - se->avg.hmp_last_down_migration) >> 10)
 					< hmp_next_down_threshold)
 		return 0;
-- 
cgit v1.2.3


From c2111520cfc682629fd75d0741ffee78b3b940ab Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Tue, 23 Jul 2013 14:56:45 +0100
Subject: HMP: Update migration timer when we fork-migrate

Prevents fork-migration adversely interacting with normal
migration (i.e. runqueues containing forked tasks being
selected as migration targets when there is a better
choice available)

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 kernel/sched/fair.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bfd27e89399..754634e774a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4079,18 +4079,26 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 		if(hmp_cpu_is_fastest(prev_cpu)) {
 			struct hmp_domain *hmpdom = list_entry(&hmp_cpu_domain(prev_cpu)->hmp_domains, struct hmp_domain, hmp_domains);
 			__always_unused int lowest_ratio = hmp_domain_min_load(hmpdom, &new_cpu);
-			if(new_cpu != NR_CPUS && cpumask_test_cpu(new_cpu,tsk_cpus_allowed(p)))
+			if (new_cpu != NR_CPUS &&
+				cpumask_test_cpu(new_cpu,
+						tsk_cpus_allowed(p))) {
+				hmp_next_up_delay(&p->se, new_cpu);
 				return new_cpu;
-			else {
-				new_cpu = cpumask_any_and(&hmp_faster_domain(cpu)->cpus,
+			} else {
+				new_cpu = cpumask_any_and(
+						&hmp_faster_domain(cpu)->cpus,
 						tsk_cpus_allowed(p));
-				if(new_cpu < nr_cpu_ids)
+				if (new_cpu < nr_cpu_ids) {
+					hmp_next_up_delay(&p->se, new_cpu);
 					return new_cpu;
+				}
 			}
 		} else {
 			new_cpu = hmp_select_faster_cpu(p, prev_cpu);
-			if (new_cpu != NR_CPUS)
+			if (new_cpu != NR_CPUS) {
+				hmp_next_up_delay(&p->se, new_cpu);
 				return new_cpu;
+			}
 		}
 	}
 #endif
-- 
cgit v1.2.3


From f4cc06ac2d9fc7a483eb778abec66f60487687f4 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Thu, 1 Aug 2013 22:32:07 -0700
Subject: htb: fix sign extension bug

[ Upstream commit cbd375567f7e4811b1c721f75ec519828ac6583f ]

When userspace passes a large priority value
the assignment of the unsigned value hopt->prio
to  signed int cl->prio causes cl->prio to become negative and the
comparison is with TC_HTB_NUMPRIO is always false.

The result is that HTB crashes by referencing outside
the array when processing packets. With this patch the large value
wraps around like other values outside the normal range.

See: https://bugzilla.kernel.org/show_bug.cgi?id=60669

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/sch_htb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index adaedd79389..5c9f0b7b210 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -87,7 +87,7 @@ struct htb_class {
 	unsigned int children;
 	struct htb_class *parent;	/* parent class */
 
-	int prio;		/* these two are used only by leaves... */
+	u32 prio;		/* these two are used only by leaves... */
 	int quantum;		/* but stored for parent-to-leaf return */
 
 	union {
-- 
cgit v1.2.3


From b4f55925ace5646d72df5af6fdab74fdbb288229 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 2 Aug 2013 11:32:43 +0200
Subject: net: rtm_to_ifaddr: free ifa if ifa_cacheinfo processing fails

[ Upstream commit 446266b0c742a2c9ee8f0dce759a0117bce58a86 ]

Commit 5c766d642 ("ipv4: introduce address lifetime") leaves the ifa
resource that was allocated via inet_alloc_ifa() unfreed when returning
the function with -EINVAL. Thus, free it first via inet_free_ifa().

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Reviewed-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/devinet.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index dfc39d4d48b..9e38217c393 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -771,7 +771,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 		ci = nla_data(tb[IFA_CACHEINFO]);
 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
 			err = -EINVAL;
-			goto errout;
+			goto errout_free;
 		}
 		*pvalid_lft = ci->ifa_valid;
 		*pprefered_lft = ci->ifa_prefered;
@@ -779,6 +779,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 
 	return ifa;
 
+errout_free:
+	inet_free_ifa(ifa);
 errout:
 	return ERR_PTR(err);
 }
-- 
cgit v1.2.3


From 4691236cedfb12e2644f4c84b4b14a6882e1fd7b Mon Sep 17 00:00:00 2001
From: Roman Gushchin <klamm@yandex-team.ru>
Date: Fri, 2 Aug 2013 18:36:40 +0400
Subject: net: check net.core.somaxconn sysctl values

[ Upstream commit 5f671d6b4ec3e6d66c2a868738af2cdea09e7509 ]

It's possible to assign an invalid value to the net.core.somaxconn
sysctl variable, because there is no checks at all.

The sk_max_ack_backlog field of the sock structure is defined as
unsigned short. Therefore, the backlog argument in inet_listen()
shouldn't exceed USHRT_MAX. The backlog argument in the listen() syscall
is truncated to the somaxconn value. So, the somaxconn value shouldn't
exceed 65535 (USHRT_MAX).
Also, negative values of somaxconn are meaningless.

before:
$ sysctl -w net.core.somaxconn=256
net.core.somaxconn = 256
$ sysctl -w net.core.somaxconn=65536
net.core.somaxconn = 65536
$ sysctl -w net.core.somaxconn=-100
net.core.somaxconn = -100

after:
$ sysctl -w net.core.somaxconn=256
net.core.somaxconn = 256
$ sysctl -w net.core.somaxconn=65536
error: "Invalid argument" setting key "net.core.somaxconn"
$ sysctl -w net.core.somaxconn=-100
error: "Invalid argument" setting key "net.core.somaxconn"

Based on a prior patch from Changli Gao.

Signed-off-by: Roman Gushchin <klamm@yandex-team.ru>
Reported-by: Changli Gao <xiaosuo@gmail.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/sysctl_net_core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cfdb46ab3a7..2ff093b7c45 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -20,7 +20,9 @@
 #include <net/sock.h>
 #include <net/net_ratelimit.h>
 
+static int zero = 0;
 static int one = 1;
+static int ushort_max = USHRT_MAX;
 
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(ctl_table *table, int write,
@@ -204,7 +206,9 @@ static struct ctl_table netns_core_table[] = {
 		.data		= &init_net.core.sysctl_somaxconn,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.extra1		= &zero,
+		.extra2		= &ushort_max,
+		.proc_handler	= proc_dointvec_minmax
 	},
 	{ }
 };
-- 
cgit v1.2.3


From 3b8dd03efa3008933d14e7203b7b29200330f039 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 5 Aug 2013 18:25:54 +0300
Subject: macvlan: validate flags

[ Upstream commit 1512747820367c8b3b8b72035f0f78c62f2bf1e9 ]

commit df8ef8f3aaa6692970a436204c4429210addb23a
    macvlan: add FDB bridge ops and macvlan flags
added a flags field to macvlan, which can be
controlled from userspace.
The idea is to make the interface future-proof
so we can add flags and not new fields.

However, flags value isn't validated, as a result,
userspace can't detect which flags are supported.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/macvlan.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 6e91931a1c2..06eba6e480c 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -727,6 +727,10 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 			return -EADDRNOTAVAIL;
 	}
 
+	if (data && data[IFLA_MACVLAN_FLAGS] &&
+	    nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~MACVLAN_FLAG_NOPROMISC)
+		return -EINVAL;
+
 	if (data && data[IFLA_MACVLAN_MODE]) {
 		switch (nla_get_u32(data[IFLA_MACVLAN_MODE])) {
 		case MACVLAN_MODE_PRIVATE:
-- 
cgit v1.2.3


From 5cf1ad6c6a29186821843e5d6f70ebbbce587f4f Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Fri, 2 Aug 2013 19:07:38 +0200
Subject: neighbour: populate neigh_parms on alloc before calling
 ndo_neigh_setup

[ Upstream commit 63134803a6369dcf7dddf7f0d5e37b9566b308d2 ]

dev->ndo_neigh_setup() might need some of the values of neigh_parms, so
populate them before calling it.

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/neighbour.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ce90b0264db..0034b611fa5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1445,16 +1445,18 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 		atomic_set(&p->refcnt, 1);
 		p->reachable_time =
 				neigh_rand_reach_time(p->base_reachable_time);
+		dev_hold(dev);
+		p->dev = dev;
+		write_pnet(&p->net, hold_net(net));
+		p->sysctl_table = NULL;
 
 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
+			release_net(net);
+			dev_put(dev);
 			kfree(p);
 			return NULL;
 		}
 
-		dev_hold(dev);
-		p->dev = dev;
-		write_pnet(&p->net, hold_net(net));
-		p->sysctl_table = NULL;
 		write_lock_bh(&tbl->lock);
 		p->next		= tbl->parms.next;
 		tbl->parms.next = p;
-- 
cgit v1.2.3


From 92986ce4e59db8c4c44caf9633095ab07f1d79f5 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Fri, 2 Aug 2013 19:07:39 +0200
Subject: bonding: modify only neigh_parms owned by us

[ Upstream commit 9918d5bf329d0dc5bb2d9d293bcb772bdb626e65 ]

Otherwise, on neighbour creation, bond_neigh_init() will be called with a
foreign netdev.

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/bonding/bond_main.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f9756961352..666cf3a49b6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3770,11 +3770,17 @@ static int bond_neigh_init(struct neighbour *n)
  * The bonding ndo_neigh_setup is called at init time beofre any
  * slave exists. So we must declare proxy setup function which will
  * be used at run time to resolve the actual slave neigh param setup.
+ *
+ * It's also called by master devices (such as vlans) to setup their
+ * underlying devices. In that case - do nothing, we're already set up from
+ * our init.
  */
 static int bond_neigh_setup(struct net_device *dev,
 			    struct neigh_parms *parms)
 {
-	parms->neigh_setup   = bond_neigh_init;
+	/* modify only our neigh_parms */
+	if (parms->dev == dev)
+		parms->neigh_setup = bond_neigh_init;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 8b2b5e27cae0bd2572687869111e516ab445be34 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 5 Aug 2013 11:18:49 -0700
Subject: fib_trie: remove potential out of bound access

[ Upstream commit aab515d7c32a34300312416c50314e755ea6f765 ]

AddressSanitizer [1] dynamic checker pointed a potential
out of bound access in leaf_walk_rcu()

We could allocate one more slot in tnode_new() to leave the prefetch()
in-place but it looks not worth the pain.

Bug added in commit 82cfbb008572b ("[IPV4] fib_trie: iterator recode")

[1] :
https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_trie.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 49616fed934..6e8a13da6cb 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -71,7 +71,6 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <linux/prefetch.h>
 #include <linux/export.h>
 #include <net/net_namespace.h>
 #include <net/ip.h>
@@ -1761,10 +1760,8 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
 			if (!c)
 				continue;
 
-			if (IS_LEAF(c)) {
-				prefetch(rcu_dereference_rtnl(p->child[idx]));
+			if (IS_LEAF(c))
 				return (struct leaf *) c;
-			}
 
 			/* Rescan start scanning in new node */
 			p = (struct tnode *) c;
-- 
cgit v1.2.3


From 5ddf771720b5761435133058842f274072afa8ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Tue, 6 Aug 2013 00:32:05 +0200
Subject: bridge: don't try to update timers in case of broken MLD queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 248ba8ec05a2c3b118c2224e57eb10c128176ab1 ]

Currently we are reading an uninitialized value for the max_delay
variable when snooping an MLD query message of invalid length and would
update our timers with that.

Fixing this by simply ignoring such broken MLD queries (just like we do
for IGMP already).

This is a regression introduced by:
"bridge: disable snooping if there is no querier" (b00589af3b04)

Reported-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d6448e35e02..75a81281c97 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1185,7 +1185,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 		max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay));
 		if (max_delay)
 			group = &mld->mld_mca;
-	} else if (skb->len >= sizeof(*mld2q)) {
+	} else {
 		if (!pskb_may_pull(skb, sizeof(*mld2q))) {
 			err = -EINVAL;
 			goto out;
-- 
cgit v1.2.3


From 16f033319c6805588375afbda88ea5a513393dd3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 5 Aug 2013 17:10:15 -0700
Subject: tcp: cubic: fix overflow error in bictcp_update()

[ Upstream commit 2ed0edf9090bf4afa2c6fc4f38575a85a80d4b20 ]

commit 17a6e9f1aa9 ("tcp_cubic: fix clock dependency") added an
overflow error in bictcp_update() in following code :

/* change the unit from HZ to bictcp_HZ */
t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) -
      ca->epoch_start) << BICTCP_HZ) / HZ;

Because msecs_to_jiffies() being unsigned long, compiler does
implicit type promotion.

We really want to constrain (tcp_time_stamp - ca->epoch_start)
to a signed 32bit value, or else 't' has unexpected high values.

This bugs triggers an increase of retransmit rates ~24 days after
boot [1], as the high order bit of tcp_time_stamp flips.

[1] for hosts with HZ=1000

Big thanks to Van Jacobson for spotting this problem.

Diagnosed-by: Van Jacobson <vanj@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_cubic.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index a9077f441cb..b6b591f0a78 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -206,8 +206,8 @@ static u32 cubic_root(u64 a)
  */
 static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 {
-	u64 offs;
-	u32 delta, t, bic_target, max_cnt;
+	u32 delta, bic_target, max_cnt;
+	u64 offs, t;
 
 	ca->ack_cnt++;	/* count the number of ACKs */
 
@@ -250,9 +250,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 	 * if the cwnd < 1 million packets !!!
 	 */
 
+	t = (s32)(tcp_time_stamp - ca->epoch_start);
+	t += msecs_to_jiffies(ca->delay_min >> 3);
 	/* change the unit from HZ to bictcp_HZ */
-	t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
-	      - ca->epoch_start) << BICTCP_HZ) / HZ;
+	t <<= BICTCP_HZ;
+	do_div(t, HZ);
 
 	if (t < ca->bic_K)		/* t - K */
 		offs = ca->bic_K - t;
-- 
cgit v1.2.3


From ca02d414915693909f9d7f455c4598d3f8b514b3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 5 Aug 2013 20:05:12 -0700
Subject: tcp: cubic: fix bug in bictcp_acked()

[ Upstream commit cd6b423afd3c08b27e1fed52db828ade0addbc6b ]

While investigating about strange increase of retransmit rates
on hosts ~24 days after boot, Van found hystart was disabled
if ca->epoch_start was 0, as following condition is true
when tcp_time_stamp high order bit is set.

(s32)(tcp_time_stamp - ca->epoch_start) < HZ

Quoting Van :

 At initialization & after every loss ca->epoch_start is set to zero so
 I believe that the above line will turn off hystart as soon as the 2^31
 bit is set in tcp_time_stamp & hystart will stay off for 24 days.
 I think we've observed that cubic's restart is too aggressive without
 hystart so this might account for the higher drop rate we observe.

Diagnosed-by: Van Jacobson <vanj@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_cubic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index b6b591f0a78..b6ae92a51f5 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -416,7 +416,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 		return;
 
 	/* Discard delay samples right after fast recovery */
-	if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+	if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
 		return;
 
 	delay = (rtt_us << 3) / USEC_PER_MSEC;
-- 
cgit v1.2.3


From 24e8ac721e0e13cab478bd190977daac48b6146c Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 7 Aug 2013 02:34:31 +0200
Subject: ipv6: don't stop backtracking in fib6_lookup_1 if subtree does not
 match

[ Upstream commit 3e3be275851bc6fc90bfdcd732cd95563acd982b ]

In case a subtree did not match we currently stop backtracking and return
NULL (root table from fib_lookup). This could yield in invalid routing
table lookups when using subtrees.

Instead continue to backtrack until a valid subtree or node is found
and return this match.

Also remove unneeded NULL check.

Reported-by: Teco Boot <teco@inf-net.nl>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Cc: David Lamparter <equinox@diac24.net>
Cc: <boutier@pps.univ-paris-diderot.fr>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_fib.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5fc9c7a68d8..2221ff6a308 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -993,14 +993,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
 
 			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
 #ifdef CONFIG_IPV6_SUBTREES
-				if (fn->subtree)
-					fn = fib6_lookup_1(fn->subtree, args + 1);
+				if (fn->subtree) {
+					struct fib6_node *sfn;
+					sfn = fib6_lookup_1(fn->subtree,
+							    args + 1);
+					if (!sfn)
+						goto backtrack;
+					fn = sfn;
+				}
 #endif
-				if (!fn || fn->fn_flags & RTN_RTINFO)
+				if (fn->fn_flags & RTN_RTINFO)
 					return fn;
 			}
 		}
-
+#ifdef CONFIG_IPV6_SUBTREES
+backtrack:
+#endif
 		if (fn->fn_flags & RTN_ROOT)
 			break;
 
-- 
cgit v1.2.3


From 27c1c98bd3b44b7c5f5c0ecfe1a1ec1240b73829 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Tue, 6 Aug 2013 13:45:43 +0300
Subject: ip_gre: fix ipgre_header to return correct offset MIME-Version: 1.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 77a482bdb2e68d13fae87541b341905ba70d572b ]

Fix ipgre_header() (header_ops->create) to return the correct
amount of bytes pushed. Most callers of dev_hard_header() seem
to care only if it was success, but af_packet.c uses it as
offset to the skb to copy from userspace only once. In practice
this fixes packet socket sendto()/sendmsg() to gre tunnels.

Regression introduced in c54419321455631079c7d6e60bc732dd0c5914c5
("GRE: Refactor GRE tunneling code.")

Cc: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_gre.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 855004f0832..c52fee0976d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -572,7 +572,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 	if (daddr)
 		memcpy(&iph->daddr, daddr, 4);
 	if (iph->daddr)
-		return t->hlen;
+		return t->hlen + sizeof(*iph);
 
 	return -(t->hlen + sizeof(*iph));
 }
-- 
cgit v1.2.3


From 6c1a4bb872100c852b07480396744608278dd9ee Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 9 Aug 2013 11:16:34 -0700
Subject: 8139cp: Fix skb leak in rx_status_loop failure path.

[ Upstream commit d06f5187469eee1b2932c02fd093d113cfc60d5e ]

Introduced in cf3c4c03060b688cbc389ebc5065ebcce5653e96
("8139cp: Add dma_mapping_error checking")

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/realtek/8139cp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 887aebea298..9095ff930f2 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -524,6 +524,7 @@ rx_status_loop:
 					 PCI_DMA_FROMDEVICE);
 		if (dma_mapping_error(&cp->pdev->dev, new_mapping)) {
 			dev->stats.rx_dropped++;
+			kfree_skb(new_skb);
 			goto rx_next;
 		}
 
-- 
cgit v1.2.3


From 21db4be1321b1fe80a28eb122e459c8ab3c2bd1f Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 8 Aug 2013 15:19:48 -0700
Subject: rtnetlink: Fix inverted check in ndo_dflt_fdb_del()

[ Upstream commit 645359930231d5e78fd3296a38b98c1a658a7ade ]

Fix inverted check when deleting an fdb entry.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a08bd2b7fe3..373a8e76af0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2142,7 +2142,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
 	/* If aging addresses are supported device will need to
 	 * implement its own handler for this.
 	 */
-	if (ndm->ndm_state & NUD_PERMANENT) {
+	if (!(ndm->ndm_state & NUD_PERMANENT)) {
 		pr_info("%s: FDB only supports static addresses\n", dev->name);
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From e307a8acf8c075261f8110b7088f20d2f7206f56 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Fri, 23 Aug 2013 12:44:55 -0700
Subject: genl: Fix genl dumpit() locking.

[ Upstream commit 9b96309c5b0b9e466773c07a5bc8b7b68fcf010a ]

In case of genl-family with parallel ops off, dumpif() callback
is expected to run under genl_lock, But commit def3117493eafd9df
(genl: Allow concurrent genl callbacks.) changed this behaviour
where only first dumpit() op was called under genl-lock.
For subsequent dump, only nlk->cb_lock was taken.
Following patch fixes it by defining locked dumpit() and done()
callback which takes care of genl-locking.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
CC: Jesse Gross <jesse@nicira.com>
CC: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/genetlink.c | 51 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 5 deletions(-)

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 1076fe16b12..8ee0b4506be 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -544,6 +544,30 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
 }
 EXPORT_SYMBOL(genlmsg_put);
 
+static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct genl_ops *ops = cb->data;
+	int rc;
+
+	genl_lock();
+	rc = ops->dumpit(skb, cb);
+	genl_unlock();
+	return rc;
+}
+
+static int genl_lock_done(struct netlink_callback *cb)
+{
+	struct genl_ops *ops = cb->data;
+	int rc = 0;
+
+	if (ops->done) {
+		genl_lock();
+		rc = ops->done(cb);
+		genl_unlock();
+	}
+	return rc;
+}
+
 static int genl_family_rcv_msg(struct genl_family *family,
 			       struct sk_buff *skb,
 			       struct nlmsghdr *nlh)
@@ -572,15 +596,32 @@ static int genl_family_rcv_msg(struct genl_family *family,
 		return -EPERM;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		struct netlink_dump_control c = {
-			.dump = ops->dumpit,
-			.done = ops->done,
-		};
+		int rc;
 
 		if (ops->dumpit == NULL)
 			return -EOPNOTSUPP;
 
-		return netlink_dump_start(net->genl_sock, skb, nlh, &c);
+		if (!family->parallel_ops) {
+			struct netlink_dump_control c = {
+				.data = ops,
+				.dump = genl_lock_dumpit,
+				.done = genl_lock_done,
+			};
+
+			genl_unlock();
+			rc = netlink_dump_start(net->genl_sock, skb, nlh, &c);
+			genl_lock();
+
+		} else {
+			struct netlink_dump_control c = {
+				.dump = ops->dumpit,
+				.done = ops->done,
+			};
+
+			rc = netlink_dump_start(net->genl_sock, skb, nlh, &c);
+		}
+
+		return rc;
 	}
 
 	if (ops->doit == NULL)
-- 
cgit v1.2.3


From a8077ef001460c41bc0509b096d3d342002c4d9b Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Fri, 23 Aug 2013 12:45:04 -0700
Subject: genl: Hold reference on correct module while netlink-dump.

[ Upstream commit 33c6b1f6b154894321f5734e50c66621e9134e7e ]

netlink dump operations take module as parameter to hold
reference for entire netlink dump duration.
Currently it holds ref only on genl module which is not correct
when we use ops registered to genl from another module.
Following patch adds module pointer to genl_ops so that netlink
can hold ref count on it.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
CC: Jesse Gross <jesse@nicira.com>
CC: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/genetlink.h | 20 ++++++++++++++++++--
 net/netlink/genetlink.c | 20 +++++++++++---------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 93024a47e0e..8e0b6c856a1 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -61,6 +61,7 @@ struct genl_family {
 	struct list_head	ops_list;	/* private */
 	struct list_head	family_list;	/* private */
 	struct list_head	mcast_groups;	/* private */
+	struct module		*module;
 };
 
 /**
@@ -121,9 +122,24 @@ struct genl_ops {
 	struct list_head	ops_list;
 };
 
-extern int genl_register_family(struct genl_family *family);
-extern int genl_register_family_with_ops(struct genl_family *family,
+extern int __genl_register_family(struct genl_family *family);
+
+static inline int genl_register_family(struct genl_family *family)
+{
+	family->module = THIS_MODULE;
+	return __genl_register_family(family);
+}
+
+extern int __genl_register_family_with_ops(struct genl_family *family,
 	struct genl_ops *ops, size_t n_ops);
+
+static inline int genl_register_family_with_ops(struct genl_family *family,
+	struct genl_ops *ops, size_t n_ops)
+{
+	family->module = THIS_MODULE;
+	return __genl_register_family_with_ops(family, ops, n_ops);
+}
+
 extern int genl_unregister_family(struct genl_family *family);
 extern int genl_register_ops(struct genl_family *, struct genl_ops *ops);
 extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 8ee0b4506be..393f17eea1a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -364,7 +364,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
 EXPORT_SYMBOL(genl_unregister_ops);
 
 /**
- * genl_register_family - register a generic netlink family
+ * __genl_register_family - register a generic netlink family
  * @family: generic netlink family
  *
  * Registers the specified family after validating it first. Only one
@@ -374,7 +374,7 @@ EXPORT_SYMBOL(genl_unregister_ops);
  *
  * Return 0 on success or a negative error code.
  */
-int genl_register_family(struct genl_family *family)
+int __genl_register_family(struct genl_family *family)
 {
 	int err = -EINVAL;
 
@@ -430,10 +430,10 @@ errout_locked:
 errout:
 	return err;
 }
-EXPORT_SYMBOL(genl_register_family);
+EXPORT_SYMBOL(__genl_register_family);
 
 /**
- * genl_register_family_with_ops - register a generic netlink family
+ * __genl_register_family_with_ops - register a generic netlink family
  * @family: generic netlink family
  * @ops: operations to be registered
  * @n_ops: number of elements to register
@@ -457,12 +457,12 @@ EXPORT_SYMBOL(genl_register_family);
  *
  * Return 0 on success or a negative error code.
  */
-int genl_register_family_with_ops(struct genl_family *family,
+int __genl_register_family_with_ops(struct genl_family *family,
 	struct genl_ops *ops, size_t n_ops)
 {
 	int err, i;
 
-	err = genl_register_family(family);
+	err = __genl_register_family(family);
 	if (err)
 		return err;
 
@@ -476,7 +476,7 @@ err_out:
 	genl_unregister_family(family);
 	return err;
 }
-EXPORT_SYMBOL(genl_register_family_with_ops);
+EXPORT_SYMBOL(__genl_register_family_with_ops);
 
 /**
  * genl_unregister_family - unregister generic netlink family
@@ -603,22 +603,24 @@ static int genl_family_rcv_msg(struct genl_family *family,
 
 		if (!family->parallel_ops) {
 			struct netlink_dump_control c = {
+				.module = family->module,
 				.data = ops,
 				.dump = genl_lock_dumpit,
 				.done = genl_lock_done,
 			};
 
 			genl_unlock();
-			rc = netlink_dump_start(net->genl_sock, skb, nlh, &c);
+			rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
 			genl_lock();
 
 		} else {
 			struct netlink_dump_control c = {
+				.module = family->module,
 				.dump = ops->dumpit,
 				.done = ops->done,
 			};
 
-			rc = netlink_dump_start(net->genl_sock, skb, nlh, &c);
+			rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
 		}
 
 		return rc;
-- 
cgit v1.2.3


From e8d11678770b9bbdcc5c2a9b3a041d136575f322 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 13 Aug 2013 01:41:06 -0700
Subject: ip_tunnel: Do not use inner ip-header-id for tunnel ip-header-id.

[ Upstream commit 4221f40513233fa8edeef7fc82e44163fde03b9b ]

Using inner-id for tunnel id is not safe in some rare cases.
E.g. packets coming from multiple sources entering same tunnel
can have same id. Therefore on tunnel packet receive we
could have packets from two different stream but with same
source and dst IP with same ip-id which could confuse ip packet
reassembly.

Following patch reverts optimization from commit
490ab08127 (IP_GRE: Fix IP-Identification.)

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
CC: Jarno Rajahalme <jrajahalme@nicira.com>
CC: Ansis Atteka <aatteka@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/vxlan.c      |  2 +-
 include/net/ip_tunnels.h | 14 --------------
 net/ipv4/ip_tunnel.c     |  2 +-
 3 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 57325f356d4..054489fdf54 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1090,7 +1090,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	iph->daddr	= dst;
 	iph->saddr	= fl4.saddr;
 	iph->ttl	= ttl ? : ip4_dst_hoplimit(&rt->dst);
-	tunnel_ip_select_ident(skb, old_iph, &rt->dst);
+	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
 	nf_reset(skb);
 
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 09b1360e10b..a9942e1faef 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -141,20 +141,6 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
 	return INET_ECN_encapsulate(tos, inner);
 }
 
-static inline void tunnel_ip_select_ident(struct sk_buff *skb,
-					  const struct iphdr  *old_iph,
-					  struct dst_entry *dst)
-{
-	struct iphdr *iph = ip_hdr(skb);
-
-	/* Use inner packet iph-id if possible. */
-	if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
-		iph->id	= old_iph->id;
-	else
-		__ip_select_ident(iph, dst,
-				  (skb_shinfo(skb)->gso_segs ?: 1) - 1);
-}
-
 static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	int err;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index cbfc37f5f05..b7a4c21c06e 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -686,7 +686,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	iph->daddr	=	fl4.daddr;
 	iph->saddr	=	fl4.saddr;
 	iph->ttl	=	ttl;
-	tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
+	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
 	iptunnel_xmit(skb, dev);
 	return;
-- 
cgit v1.2.3


From f784dbb9b9b868185b78295b120dbc1f79513e55 Mon Sep 17 00:00:00 2001
From: Asbjoern Sloth Toennesen <ast@fiberby.net>
Date: Mon, 12 Aug 2013 16:30:09 +0000
Subject: rtnetlink: rtnl_bridge_getlink: Call nlmsg_find_attr() with ifinfomsg
 header

[ Upstream commit 3e805ad288c524bb65aad3f1e004402223d3d504 ]

Fix the iproute2 command `bridge vlan show`, after switching from
rtgenmsg to ifinfomsg.

Let's start with a little history:

Feb 20:   Vlad Yasevich got his VLAN-aware bridge patchset included in
          the 3.9 merge window.
          In the kernel commit 6cbdceeb, he added attribute support to
          bridge GETLINK requests sent with rtgenmsg.

Mar 6th:  Vlad got this iproute2 reference implementation of the bridge
          vlan netlink interface accepted (iproute2 9eff0e5c)

Apr 25th: iproute2 switched from using rtgenmsg to ifinfomsg (63338dca)
          http://patchwork.ozlabs.org/patch/239602/
          http://marc.info/?t=136680900700007

Apr 28th: Linus released 3.9

Apr 30th: Stephen released iproute2 3.9.0

The `bridge vlan show` command haven't been working since the switch to
ifinfomsg, or in a released version of iproute2. Since the kernel side
only supports rtgenmsg, which iproute2 switched away from just prior to
the iproute2 3.9.0 release.

I haven't been able to find any documentation, about neither rtgenmsg
nor ifinfomsg, and in which situation to use which, but kernel commit
88c5b5ce seams to suggest that ifinfomsg should be used.

Fixing this in kernel will break compatibility, but I doubt that anybody
have been using it due to this bug in the user space reference
implementation, at least not without noticing this bug. That said the
functionality is still fully functional in 3.9, when reversing iproute2
commit 63338dca.

This could also be fixed in iproute2, but thats an ugly patch that would
reintroduce rtgenmsg in iproute2, and from searching in netdev it seams
like rtgenmsg usage is discouraged. I'm assuming that the only reason
that Vlad implemented the kernel side to use rtgenmsg, was because
iproute2 was using it at the time.

Signed-off-by: Asbjoern Sloth Toennesen <ast@fiberby.net>
Reviewed-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 373a8e76af0..fd01eca52a1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2374,7 +2374,7 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 	struct nlattr *extfilt;
 	u32 filter_mask = 0;
 
-	extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg),
+	extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg),
 				  IFLA_EXT_MASK);
 	if (extfilt)
 		filter_mask = nla_get_u32(extfilt);
-- 
cgit v1.2.3


From b59bde78dbf049a2671603034562940ac6eb1181 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 15 Aug 2013 15:52:57 +0300
Subject: tun: signedness bug in tun_get_user()

[ Upstream commit 15718ea0d844e4816dbd95d57a8a0e3e264ba90e ]

The recent fix d9bf5f1309 "tun: compare with 0 instead of total_len" is
not totally correct.  Because "len" and "sizeof()" are size_t type, that
means they are never less than zero.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/tun.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2491eb28a8d..7b54f4f052d 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1076,8 +1076,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	u32 rxhash;
 
 	if (!(tun->flags & TUN_NO_PI)) {
-		if ((len -= sizeof(pi)) > total_len)
+		if (len < sizeof(pi))
 			return -EINVAL;
+		len -= sizeof(pi);
 
 		if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi)))
 			return -EFAULT;
@@ -1085,8 +1086,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	}
 
 	if (tun->flags & TUN_VNET_HDR) {
-		if ((len -= tun->vnet_hdr_sz) > total_len)
+		if (len < tun->vnet_hdr_sz)
 			return -EINVAL;
+		len -= tun->vnet_hdr_sz;
 
 		if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
 			return -EFAULT;
-- 
cgit v1.2.3


From ad558a2970f60e8a52f048c96f0f6ea7dc691fb7 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 16 Aug 2013 13:02:27 +0200
Subject: ipv6: remove max_addresses check from ipv6_create_tempaddr

[ Upstream commit 4b08a8f1bd8cb4541c93ec170027b4d0782dab52 ]

Because of the max_addresses check attackers were able to disable privacy
extensions on an interface by creating enough autoconfigured addresses:

<http://seclists.org/oss-sec/2012/q4/292>

But the check is not actually needed: max_addresses protects the
kernel to install too many ipv6 addresses on an interface and guards
addrconf_prefix_rcv to install further addresses as soon as this limit
is reached. We only generate temporary addresses in direct response of
a new address showing up. As soon as we filled up the maximum number of
addresses of an interface, we stop installing more addresses and thus
also stop generating more temp addresses.

Even if the attacker tries to generate a lot of temporary addresses
by announcing a prefix and removing it again (lifetime == 0) we won't
install more temp addresses, because the temporary addresses do count
to the maximum number of addresses, thus we would stop installing new
autoconfigured addresses when the limit is reached.

This patch fixes CVE-2013-0343 (but other layer-2 attacks are still
possible).

Thanks to Ding Tianhong to bring this topic up again.

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Ding Tianhong <dingtianhong@huawei.com>
Cc: George Kargiotakis <kargig@void.gr>
Cc: P J P <ppandit@redhat.com>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Acked-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/addrconf.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fb8c94c4ab8..d3057f9b04a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1124,12 +1124,10 @@ retry:
 	if (ifp->flags & IFA_F_OPTIMISTIC)
 		addr_flags |= IFA_F_OPTIMISTIC;
 
-	ift = !max_addresses ||
-	      ipv6_count_addresses(idev) < max_addresses ?
-		ipv6_add_addr(idev, &addr, tmp_plen,
-			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
-			      addr_flags) : NULL;
-	if (IS_ERR_OR_NULL(ift)) {
+	ift = ipv6_add_addr(idev, &addr, tmp_plen,
+			    ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+			    addr_flags);
+	if (IS_ERR(ift)) {
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
 		pr_info("%s: retry temporary address regeneration\n", __func__);
-- 
cgit v1.2.3


From a829a28873ec2f5daedd77ef91e430b260af1521 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 16 Aug 2013 13:30:07 +0200
Subject: ipv6: drop packets with multiple fragmentation headers

[ Upstream commit f46078cfcd77fa5165bf849f5e568a7ac5fa569c ]

It is not allowed for an ipv6 packet to contain multiple fragmentation
headers. So discard packets which were already reassembled by
fragmentation logic and send back a parameter problem icmp.

The updates for RFC 6980 will come in later, I have to do a bit more
research here.

Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ipv6.h  | 1 +
 net/ipv6/reassembly.c | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 850e95bc766..b8b7dc75575 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -101,6 +101,7 @@ struct inet6_skb_parm {
 #define IP6SKB_FORWARDED	2
 #define IP6SKB_REROUTED		4
 #define IP6SKB_ROUTERALERT	8
+#define IP6SKB_FRAGMENTED      16
 };
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 790d9f4b8b0..1aeb473b2cc 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -490,6 +490,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	ipv6_hdr(head)->payload_len = htons(payload_len);
 	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
 	IP6CB(head)->nhoff = nhoff;
+	IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
 
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -524,6 +525,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	struct net *net = dev_net(skb_dst(skb)->dev);
 	int evicted;
 
+	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+		goto fail_hdr;
+
 	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
 
 	/* Jumbo payload inhibits frag. header */
@@ -544,6 +548,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 				 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
 
 		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
+		IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
 		return 1;
 	}
 
-- 
cgit v1.2.3


From f3f905389f1aaae9e091a28d018c66e08c85eddd Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Fri, 16 Aug 2013 19:04:36 +0400
Subject: tcp: set timestamps for restored skb-s

[ Upstream commit 7ed5c5ae96d23da22de95e1c7a239537acd378b1 ]

When the repair mode is turned off, the write queue seqs are
updated so that the whole queue is considered to be 'already sent.

The "when" field must be set for such skb. It's used in tcp_rearm_rto
for example. If the "when" field isn't set, the retransmit timeout can
be calculated incorrectly and a tcp connected can stop for two minutes
(TCP_RTO_MAX).

Acked-by: Pavel Emelyanov <xemul@parallels.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab450c099aa..2005561861a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1117,6 +1117,13 @@ new_segment:
 				if (!skb)
 					goto wait_for_memory;
 
+				/*
+				 * All packets are restored as if they have
+				 * already been sent.
+				 */
+				if (tp->repair)
+					TCP_SKB_CB(skb)->when = tcp_time_stamp;
+
 				/*
 				 * Check whether we can use HW checksum.
 				 */
-- 
cgit v1.2.3


From fc26e4cf6f6be400ea7b5442982c58e76b6beda4 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 19 Aug 2013 16:40:22 -0400
Subject: packet: restore packet statistics tp_packets to include drops

[ Upstream commit 8bcdeaff5ed544704a9a691d4aef0adb3f9c5b8f ]

getsockopt PACKET_STATISTICS returns tp_packets + tp_drops. Commit
ee80fbf301 ("packet: account statistics only in tpacket_stats_u")
cleaned up the getsockopt PACKET_STATISTICS code.
This also changed semantics. Historically, tp_packets included
tp_drops on return. The commit removed the line that adds tp_drops
into tp_packets.

This patch reinstates the old semantics.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 20a1bd0e654..a6895ab597c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3259,9 +3259,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 
 		if (po->tp_version == TPACKET_V3) {
 			lv = sizeof(struct tpacket_stats_v3);
+			st.stats3.tp_packets += st.stats3.tp_drops;
 			data = &st.stats3;
 		} else {
 			lv = sizeof(struct tpacket_stats);
+			st.stats1.tp_packets += st.stats1.tp_drops;
 			data = &st.stats1;
 		}
 
-- 
cgit v1.2.3


From 9f0bd377e1210501cd11eef80159e5d7f6160fef Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Tue, 20 Aug 2013 17:10:18 +0900
Subject: bridge: Use the correct bit length for bitmap functions in the VLAN
 code

[ Upstream commit ef40b7ef181b7b1a24df2ef2d1ef84956bffa635 ]

The VLAN code needs to know the length of the per-port VLAN bitmap to
perform its most basic operations (retrieving VLAN informations, removing
VLANs, forwarding database manipulation, etc). Unfortunately, in the
current implementation we are using a macro that indicates the bitmap
size in longs in places where the size in bits is expected, which in
some cases can cause what appear to be random failures.
Use the correct macro.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/br_fdb.c     | 10 +++++-----
 net/bridge/br_netlink.c |  4 ++--
 net/bridge/br_vlan.c    |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index ebfa4443c69..84dd783abe5 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -161,7 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 	if (!pv)
 		return;
 
-	for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+	for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
 		f = __br_fdb_get(br, br->dev->dev_addr, vid);
 		if (f && f->is_local && !f->dst)
 			fdb_delete(br, f);
@@ -725,7 +725,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		/* VID was specified, so use it. */
 		err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
 	} else {
-		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+		if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
 			err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
 			goto out;
 		}
@@ -734,7 +734,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		 * specify a VLAN.  To be nice, add/update entry for every
 		 * vlan on this port.
 		 */
-		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
 			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
 			if (err)
 				goto out;
@@ -812,7 +812,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 
 		err = __br_fdb_delete(p, addr, vid);
 	} else {
-		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+		if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
 			err = __br_fdb_delete(p, addr, 0);
 			goto out;
 		}
@@ -822,7 +822,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 		 * vlan on this port.
 		 */
 		err = -ENOENT;
-		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
 			err &= __br_fdb_delete(p, addr, vid);
 		}
 	}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8e3abf56479..f66a03453a8 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -128,7 +128,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 		else
 			pv = br_get_vlan_info(br);
 
-		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN))
+		if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID))
 			goto done;
 
 		af = nla_nest_start(skb, IFLA_AF_SPEC);
@@ -136,7 +136,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 			goto nla_put_failure;
 
 		pvid = br_get_pvid(pv);
-		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
 			vinfo.vid = vid;
 			vinfo.flags = 0;
 			if (vid == pvid)
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bd58b45f5f9..9a9ffe7e401 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -108,7 +108,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
 
 	clear_bit(vid, v->vlan_bitmap);
 	v->num_vlans--;
-	if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+	if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) {
 		if (v->port_idx)
 			rcu_assign_pointer(v->parent.port->vlan_info, NULL);
 		else
@@ -122,7 +122,7 @@ static void __vlan_flush(struct net_port_vlans *v)
 {
 	smp_wmb();
 	v->pvid = 0;
-	bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+	bitmap_zero(v->vlan_bitmap, VLAN_N_VID);
 	if (v->port_idx)
 		rcu_assign_pointer(v->parent.port->vlan_info, NULL);
 	else
-- 
cgit v1.2.3


From c06ab09127706af38e9e2869afef82a0f63e5fd7 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 14 Aug 2013 23:47:11 +0200
Subject: net_sched: restore "linklayer atm" handling

[ Upstream commit 8a8e3d84b1719a56f9151909e80ea6ebc5b8e318 ]

commit 56b765b79 ("htb: improved accuracy at high rates")
broke the "linklayer atm" handling.

 tc class add ... htb rate X ceil Y linklayer atm

The linklayer setting is implemented by modifying the rate table
which is send to the kernel.  No direct parameter were
transferred to the kernel indicating the linklayer setting.

The commit 56b765b79 ("htb: improved accuracy at high rates")
removed the use of the rate table system.

To keep compatible with older iproute2 utils, this patch detects
the linklayer by parsing the rate table.  It also supports future
versions of iproute2 to send this linklayer parameter to the
kernel directly. This is done by using the __reserved field in
struct tc_ratespec, to convey the choosen linklayer option, but
only using the lower 4 bits of this field.

Linklayer detection is limited to speeds below 100Mbit/s, because
at high rates the rtab is gets too inaccurate, so bad that
several fields contain the same values, this resembling the ATM
detect.  Fields even start to contain "0" time to send, e.g. at
1000Mbit/s sending a 96 bytes packet cost "0", thus the rtab have
been more broken than we first realized.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sch_generic.h      |  9 ++++++++-
 include/uapi/linux/pkt_sched.h | 10 +++++++++-
 net/sched/sch_api.c            | 41 +++++++++++++++++++++++++++++++++++++++++
 net/sched/sch_generic.c        |  1 +
 net/sched/sch_htb.c            | 13 +++++++++++++
 5 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e7f4e21cc3e..63ed1d1dd9e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -682,13 +682,19 @@ struct psched_ratecfg {
 	u64	rate_bps;
 	u32	mult;
 	u16	overhead;
+	u8	linklayer;
 	u8	shift;
 };
 
 static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
 				unsigned int len)
 {
-	return ((u64)(len + r->overhead) * r->mult) >> r->shift;
+	len += r->overhead;
+
+	if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
+		return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
+
+	return ((u64)len * r->mult) >> r->shift;
 }
 
 extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf);
@@ -699,6 +705,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
 	memset(res, 0, sizeof(*res));
 	res->rate = r->rate_bps >> 3;
 	res->overhead = r->overhead;
+	res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
 }
 
 #endif
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index dbd71b0c7d8..09d62b9228f 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -73,9 +73,17 @@ struct tc_estimator {
 #define TC_H_ROOT	(0xFFFFFFFFU)
 #define TC_H_INGRESS    (0xFFFFFFF1U)
 
+/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
+enum tc_link_layer {
+	TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
+	TC_LINKLAYER_ETHERNET,
+	TC_LINKLAYER_ATM,
+};
+#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
+
 struct tc_ratespec {
 	unsigned char	cell_log;
-	unsigned char	__reserved;
+	__u8		linklayer; /* lower 4 bits */
 	unsigned short	overhead;
 	short		cell_align;
 	unsigned short	mpu;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 281c1bded1f..51b968d3feb 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -285,6 +285,45 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
 	return q;
 }
 
+/* The linklayer setting were not transferred from iproute2, in older
+ * versions, and the rate tables lookup systems have been dropped in
+ * the kernel. To keep backward compatible with older iproute2 tc
+ * utils, we detect the linklayer setting by detecting if the rate
+ * table were modified.
+ *
+ * For linklayer ATM table entries, the rate table will be aligned to
+ * 48 bytes, thus some table entries will contain the same value.  The
+ * mpu (min packet unit) is also encoded into the old rate table, thus
+ * starting from the mpu, we find low and high table entries for
+ * mapping this cell.  If these entries contain the same value, when
+ * the rate tables have been modified for linklayer ATM.
+ *
+ * This is done by rounding mpu to the nearest 48 bytes cell/entry,
+ * and then roundup to the next cell, calc the table entry one below,
+ * and compare.
+ */
+static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
+{
+	int low       = roundup(r->mpu, 48);
+	int high      = roundup(low+1, 48);
+	int cell_low  = low >> r->cell_log;
+	int cell_high = (high >> r->cell_log) - 1;
+
+	/* rtab is too inaccurate at rates > 100Mbit/s */
+	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
+		pr_debug("TC linklayer: Giving up ATM detection\n");
+		return TC_LINKLAYER_ETHERNET;
+	}
+
+	if ((cell_high > cell_low) && (cell_high < 256)
+	    && (rtab[cell_low] == rtab[cell_high])) {
+		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
+			 cell_low, cell_high, rtab[cell_high]);
+		return TC_LINKLAYER_ATM;
+	}
+	return TC_LINKLAYER_ETHERNET;
+}
+
 static struct qdisc_rate_table *qdisc_rtab_list;
 
 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
@@ -308,6 +347,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta
 		rtab->rate = *r;
 		rtab->refcnt = 1;
 		memcpy(rtab->data, nla_data(tab), 1024);
+		if (r->linklayer == TC_LINKLAYER_UNAWARE)
+			r->linklayer = __detect_linklayer(r, rtab->data);
 		rtab->next = qdisc_rtab_list;
 		qdisc_rtab_list = rtab;
 	}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 20224086cc2..a7f838b45dc 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -908,6 +908,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
 	memset(r, 0, sizeof(*r));
 	r->overhead = conf->overhead;
 	r->rate_bps = (u64)conf->rate << 3;
+	r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
 	r->mult = 1;
 	/*
 	 * Calibrate mult, shift so that token counting is accurate
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5c9f0b7b210..910667cbc1e 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1312,6 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
 	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
 	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_opt *hopt;
 
@@ -1333,6 +1334,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	if (!hopt->rate.rate || !hopt->ceil.rate)
 		goto failure;
 
+	/* Keeping backward compatible with rate_table based iproute2 tc */
+	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
+		rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
+		if (rtab)
+			qdisc_put_rtab(rtab);
+	}
+	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) {
+		ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
+		if (ctab)
+			qdisc_put_rtab(ctab);
+	}
+
 	if (!cl) {		/* new class */
 		struct Qdisc *new_q;
 		int prio;
-- 
cgit v1.2.3


From 40361e8fcc76eb603970d4da02592d4dc4a6f631 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 9 Jul 2013 17:12:49 +0100
Subject: sfc: Fix lookup of default RX MAC filters when steered using ethtool

[ Upstream commit f3851b0acc5a75bd33c6d344a2e4f920e1622ff0 ]

commit 385904f819e3 ('sfc: Don't use
efx_filter_{build,hash,increment}() for default MAC filters') used the
wrong name to find the index of default RX MAC filters at insertion/
update time.  This could result in memory corruption and would in any
case silently fail to update the filter.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/sfc/filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c
index 2738b5f1c81..a52046581a6 100644
--- a/drivers/net/ethernet/sfc/filter.c
+++ b/drivers/net/ethernet/sfc/filter.c
@@ -675,7 +675,7 @@ s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec,
 		BUILD_BUG_ON(EFX_FILTER_INDEX_UC_DEF != 0);
 		BUILD_BUG_ON(EFX_FILTER_INDEX_MC_DEF !=
 			     EFX_FILTER_MC_DEF - EFX_FILTER_UC_DEF);
-		rep_index = spec->type - EFX_FILTER_INDEX_UC_DEF;
+		rep_index = spec->type - EFX_FILTER_UC_DEF;
 		ins_index = rep_index;
 
 		spin_lock_bh(&state->lock);
-- 
cgit v1.2.3


From 3d740e64f6e333efb2c539d5ea7ed56a5ba2757d Mon Sep 17 00:00:00 2001
From: Sathya Perla <sathya.perla@emulex.com>
Date: Thu, 22 Aug 2013 12:23:41 +0530
Subject: be2net: fix disabling TX in be_close()

[ Upstream commit 6e1f99757a2b24b7255263b2240a0eb04215174d ]

commit fba875591 ("disable TX in be_close()") disabled TX in be_close()
to protect be_xmit() from touching freed up queues in the AER recovery
flow.  But, TX must be disabled *before* cleaning up TX completions in
the close() path, not after. This allows be_tx_compl_clean() to free up
all TX-req skbs that were notified to the HW.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 6e4342695fa..7371626c56a 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2561,8 +2561,8 @@ static int be_close(struct net_device *netdev)
 	/* Wait for all pending tx completions to arrive so that
 	 * all tx skbs are freed.
 	 */
-	be_tx_compl_clean(adapter);
 	netif_tx_disable(netdev);
+	be_tx_compl_clean(adapter);
 
 	be_rx_qs_destroy(adapter);
 
-- 
cgit v1.2.3


From f2e39b5ea18377e079ab44323d9c8653a15fc5d6 Mon Sep 17 00:00:00 2001
From: Rob Gardner <robmatic@gmail.com>
Date: Sun, 25 Aug 2013 16:02:23 -0600
Subject: net: usb: Add HP hs2434 device to ZLP exception table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 03803a59e32453ee5737c6096a295f748f03cc49 ]

This patch adds another entry (HP hs2434 Mobile Broadband) to the list
of exceptional devices that require a zero length packet in order to
function properly. This list was added in commit 844e88f0. The hs2434
is manufactured by Sierra Wireless, who also produces the MC7710,
which the ZLP exception list was created for in the first place. So
hopefully it is just this one producer's devices that will need this
workaround.

Tested on a DM1-4310NR HP notebook, which does not function without this
change.

Signed-off-by: Rob Gardner <robmatic@gmail.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/cdc_mbim.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 872819851ae..25ba7eca9a1 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -400,6 +400,10 @@ static const struct usb_device_id mbim_devs[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68a2, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
 	  .driver_info = (unsigned long)&cdc_mbim_info_zlp,
 	},
+	/* HP hs2434 Mobile Broadband Module needs ZLPs */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x3f0, 0x4b1d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+	  .driver_info = (unsigned long)&cdc_mbim_info_zlp,
+	},
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
 	  .driver_info = (unsigned long)&cdc_mbim_info,
 	},
-- 
cgit v1.2.3


From 6f198dcab737db9335f1f4c7d97f801e0d5de186 Mon Sep 17 00:00:00 2001
From: Andrew Vagin <avagin@openvz.org>
Date: Tue, 27 Aug 2013 12:20:40 +0400
Subject: tcp: initialize rcv_tstamp for restored sockets

[ Upstream commit c7781a6e3c4a9a17e144ec2db00ebfea327bd627 ]

u32 rcv_tstamp;     /* timestamp of last received ACK */

Its value used in tcp_retransmit_timer, which closes socket
if the last ack was received more then TCP_RTO_MAX ago.

Currently rcv_tstamp is initialized to zero and if tcp_retransmit_timer
is called before receiving a first ack, the connection is closed.

This patch initializes rcv_tstamp to a timestamp, when a socket was
restored.

Reported-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_output.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ec335fabd5c..2c48d51f47a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2808,6 +2808,8 @@ void tcp_connect_init(struct sock *sk)
 
 	if (likely(!tp->repair))
 		tp->rcv_nxt = 0;
+	else
+		tp->rcv_tstamp = tcp_time_stamp;
 	tp->rcv_wup = tp->rcv_nxt;
 	tp->copied_seq = tp->rcv_nxt;
 
-- 
cgit v1.2.3


From 6fe6efd941d49f7169e78b2e60ca2dc6a56ca8b4 Mon Sep 17 00:00:00 2001
From: Andrew Vagin <avagin@openvz.org>
Date: Tue, 27 Aug 2013 12:21:55 +0400
Subject: tcp: don't apply tsoffset if rcv_tsecr is zero

[ Upstream commit e3e12028315749b7fa2edbc37328e5847be9ede9 ]

The zero value means that tsecr is not valid, so it's a special case.

tsoffset is used to customize tcp_time_stamp for one socket.
tsoffset is usually zero, it's used when a socket was moved from one
host to another host.

Currently this issue affects logic of tcp_rcv_rtt_measure_ts. Due to
incorrect value of rcv_tsecr, tcp_rcv_rtt_measure_ts sets rto to
TCP_RTO_MAX.

Reported-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9c6225780bd..4b75aad14b0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3598,7 +3598,10 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
 		++ptr;
 		tp->rx_opt.rcv_tsval = ntohl(*ptr);
 		++ptr;
-		tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
+		if (*ptr)
+			tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
+		else
+			tp->rx_opt.rcv_tsecr = 0;
 		return true;
 	}
 	return false;
@@ -3623,7 +3626,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
 	}
 
 	tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
-	if (tp->rx_opt.saw_tstamp)
+	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
 	return true;
@@ -5376,7 +5379,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
 	tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
-	if (tp->rx_opt.saw_tstamp)
+	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
 	if (th->ack) {
-- 
cgit v1.2.3


From 5612d36ca1438c23280b962a80943d14b2e9f778 Mon Sep 17 00:00:00 2001
From: Chris Clark <chris.clark@alcatel-lucent.com>
Date: Tue, 27 Aug 2013 12:02:15 -0600
Subject: ipv4: sendto/hdrincl: don't use destination address found in header

[ Upstream commit c27c9322d015dc1d9dfdf31724fca71c0476c4d1 ]

ipv4: raw_sendmsg: don't use header's destination address

A sendto() regression was bisected and found to start with commit
f8126f1d5136be1 (ipv4: Adjust semantics of rt->rt_gateway.)

The problem is that it tries to ARP-lookup the constructed packet's
destination address rather than the explicitly provided address.

Fix this using FLOWI_FLAG_KNOWN_NH so that given nexthop is used.

cf. commit 2ad5b9e4bd314fc685086b99e90e5de3bc59e26b

Reported-by: Chris Clark <chris.clark@alcatel-lucent.com>
Bisected-by: Chris Clark <chris.clark@alcatel-lucent.com>
Tested-by: Chris Clark <chris.clark@alcatel-lucent.com>
Suggested-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Chris Clark <chris.clark@alcatel-lucent.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/raw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dd44e0ab600..61e60d67adc 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -571,7 +571,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
 			   RT_SCOPE_UNIVERSE,
 			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
-			   inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
+			   inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP |
+			    (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
 			   daddr, saddr, 0, 0);
 
 	if (!inet->hdrincl) {
-- 
cgit v1.2.3


From 4b7ead801d3e174ae14ccaed02773041419ae278 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 Sep 2013 13:37:01 +0200
Subject: ipv6: Don't depend on per socket memory for neighbour discovery
 messages

[ Upstream commit 25a6e6b84fba601eff7c28d30da8ad7cfbef0d43 ]

Allocating skbs when sending out neighbour discovery messages
currently uses sock_alloc_send_skb() based on a per net namespace
socket and thus share a socket wmem buffer space.

If a netdevice is temporarily unable to transmit due to carrier
loss or for other reasons, the queued up ndisc messages will cosnume
all of the wmem space and will thus prevent from any more skbs to
be allocated even for netdevices that are able to transmit packets.

The number of neighbour discovery messages sent is very limited,
use of alloc_skb() bypasses the socket wmem buffer size enforcement
while the manual call to skb_set_owner_w() maintains the socket
reference needed for the IPv6 output path.

This patch has orginally been posted by Eric Dumazet in a modified
form.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Stephen Warren <swarren@wwwdotorg.org>
Cc: Fabio Estevam <festevam@gmail.com>
Tested-by: Fabio Estevam <fabio.estevam@freescale.com>
Tested-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ndisc.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ca4ffcc287f..060a0449aca 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -372,14 +372,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
 	int tlen = dev->needed_tailroom;
 	struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
 	struct sk_buff *skb;
-	int err;
 
-	skb = sock_alloc_send_skb(sk,
-				  hlen + sizeof(struct ipv6hdr) + len + tlen,
-				  1, &err);
+	skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
 	if (!skb) {
-		ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n",
-			  __func__, err);
+		ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
+			  __func__);
 		return NULL;
 	}
 
@@ -389,6 +386,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
 	skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
 	skb_reset_transport_header(skb);
 
+	/* Manually assign socket ownership as we avoid calling
+	 * sock_alloc_send_pskb() to bypass wmem buffer limits
+	 */
+	skb_set_owner_w(skb, sk);
+
 	return skb;
 }
 
-- 
cgit v1.2.3


From b70a23ab4ab5a95ab9be1bf77b73c1ad9f4e15a4 Mon Sep 17 00:00:00 2001
From: Phil Oester <kernel@linuxace.com>
Date: Tue, 27 Aug 2013 16:41:40 -0700
Subject: tcp: tcp_make_synack() should use sock_wmalloc

[ Upstream commit eb8895debe1baba41fcb62c78a16f0c63c21662a ]

In commit 90ba9b19 (tcp: tcp_make_synack() can use alloc_skb()), Eric changed
the call to sock_wmalloc in tcp_make_synack to alloc_skb.  In doing so,
the netfilter owner match lost its ability to block the SYNACK packet on
outbound listening sockets.  Revert the change, restoring the owner match
functionality.

This closes netfilter bugzilla #847.

Signed-off-by: Phil Oester <kernel@linuxace.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2c48d51f47a..0145ce7e609 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2664,7 +2664,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	int tcp_header_size;
 	int mss;
 
-	skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));
+	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
 	if (unlikely(!skb)) {
 		dst_release(dst);
 		return NULL;
-- 
cgit v1.2.3


From 8db07b82b70897d868d864402b43a68da5e0cd59 Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Wed, 28 Aug 2013 09:29:58 +0200
Subject: tipc: set sk_err correctly when connection fails

[ Upstream commit 2c8d85182348021fc0a1bed193a4be4161dc8364 ]

Should a connect fail, if the publication/server is unavailable or
due to some other error, a positive value will be returned and errno
is never set. If the application code checks for an explicit zero
return from connect (success) or a negative return (failure), it
will not catch the error and subsequent send() calls will fail as
shown from the strace snippet below.

socket(0x1e /* PF_??? */, SOCK_SEQPACKET, 0) = 3
connect(3, {sa_family=0x1e /* AF_??? */, sa_data="\2\1\322\4\0\0\322\4\0\0\0\0\0\0"}, 16) = 111
sendto(3, "test", 4, 0, NULL, 0)        = -1 EPIPE (Broken pipe)

The reason for this behaviour is that TIPC wrongly inverts error
codes set in sk_err.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/tipc/socket.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 515ce38e4f4..7e26ad416af 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1179,7 +1179,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
 		/* Accept only ACK or NACK message */
 		if (unlikely(msg_errcode(msg))) {
 			sock->state = SS_DISCONNECTING;
-			sk->sk_err = -ECONNREFUSED;
+			sk->sk_err = ECONNREFUSED;
 			retval = TIPC_OK;
 			break;
 		}
@@ -1190,7 +1190,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
 		res = auto_connect(sock, msg);
 		if (res) {
 			sock->state = SS_DISCONNECTING;
-			sk->sk_err = res;
+			sk->sk_err = -res;
 			retval = TIPC_OK;
 			break;
 		}
-- 
cgit v1.2.3


From 56a12acebcbd08342f7287a5870fe7ec2c0de91a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 28 Aug 2013 18:10:43 -0700
Subject: net: revert 8728c544a9c ("net: dev_pick_tx() fix")

[ Upstream commit 702821f4ea6f68db18aa1de7d8ed62c6ba586a64 ]

commit 8728c544a9cbdc ("net: dev_pick_tx() fix") and commit
b6fe83e9525a ("bonding: refine IFF_XMIT_DST_RELEASE capability")
are quite incompatible : Queue selection is disabled because skb
dst was dropped before entering bonding device.

This causes major performance regression, mainly because TCP packets
for a given flow can be sent to multiple queues.

This is particularly visible when using the new FQ packet scheduler
with MQ + FQ setup on the slaves.

We can safely revert the first commit now that 416186fbf8c5b
("net: Split core bits of netdev_pick_tx into __netdev_pick_tx")
properly caps the queue_index.

Reported-by: Xi Wang <xii@google.com>
Diagnosed-by: Xi Wang <xii@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Denys Fedorysychenko <nuclearcat@nuclearcat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/flow_dissector.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 00ee068efc1..c99cc371bbd 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -345,14 +345,9 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 		if (new_index < 0)
 			new_index = skb_tx_hash(dev, skb);
 
-		if (queue_index != new_index && sk) {
-			struct dst_entry *dst =
-				    rcu_dereference_check(sk->sk_dst_cache, 1);
-
-			if (dst && skb_dst(skb) == dst)
-				sk_tx_queue_set(sk, queue_index);
-
-		}
+		if (queue_index != new_index && sk &&
+		    rcu_access_pointer(sk->sk_dst_cache))
+			sk_tx_queue_set(sk, queue_index);
 
 		queue_index = new_index;
 	}
-- 
cgit v1.2.3


From 00897febb83864e2dd388d719b256cb362198e27 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 29 Aug 2013 23:55:05 +0200
Subject: net: bridge: convert MLDv2 Query MRC into msecs_to_jiffies for
 max_delay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 2d98c29b6fb3de44d9eaa73c09f9cf7209346383 ]

While looking into MLDv1/v2 code, I noticed that bridging code does
not convert it's max delay into jiffies for MLDv2 messages as we do
in core IPv6' multicast code.

RFC3810, 5.1.3. Maximum Response Code says:

  The Maximum Response Code field specifies the maximum time allowed
  before sending a responding Report. The actual time allowed, called
  the Maximum Response Delay, is represented in units of milliseconds,
  and is derived from the Maximum Response Code as follows: [...]

As we update timers that work with jiffies, we need to convert it.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Linus Lüssing <linus.luessing@web.de>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/br_multicast.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 75a81281c97..d82058f6fc7 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1193,7 +1193,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 		mld2q = (struct mld2_query *)icmp6_hdr(skb);
 		if (!mld2q->mld2q_nsrcs)
 			group = &mld2q->mld2q_mca;
-		max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1;
+
+		max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL);
 	}
 
 	if (!group)
-- 
cgit v1.2.3


From 2aae409672a9ec6078702b2fe92bd41ecf05d826 Mon Sep 17 00:00:00 2001
From: Jiri Bohac <jbohac@suse.cz>
Date: Fri, 30 Aug 2013 11:18:45 +0200
Subject: ICMPv6: treat dest unreachable codes 5 and 6 as EACCES, not EPROTO

[ Upstream commit 61e76b178dbe7145e8d6afa84bb4ccea71918994 ]

RFC 4443 has defined two additional codes for ICMPv6 type 1 (destination
unreachable) messages:
        5 - Source address failed ingress/egress policy
	6 - Reject route to destination

Now they are treated as protocol error and icmpv6_err_convert() converts them
to EPROTO.

RFC 4443 says:
	"Codes 5 and 6 are more informative subsets of code 1."

Treat codes 5 and 6 as code 1 (EACCES)

Btw, connect() returning -EPROTO confuses firefox, so that fallback to
other/IPv4 addresses does not work:
https://bugzilla.mozilla.org/show_bug.cgi?id=910773

Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/icmpv6.h |  2 ++
 net/ipv6/icmp.c             | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h
index e0133c73c30..590beda78ea 100644
--- a/include/uapi/linux/icmpv6.h
+++ b/include/uapi/linux/icmpv6.h
@@ -115,6 +115,8 @@ struct icmp6hdr {
 #define ICMPV6_NOT_NEIGHBOUR		2
 #define ICMPV6_ADDR_UNREACH		3
 #define ICMPV6_PORT_UNREACH		4
+#define ICMPV6_POLICY_FAIL		5
+#define ICMPV6_REJECT_ROUTE		6
 
 /*
  *	Codes for Time Exceeded
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b4ff0a42b8c..70e704d4900 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -931,6 +931,14 @@ static const struct icmp6_err {
 		.err	= ECONNREFUSED,
 		.fatal	= 1,
 	},
+	{	/* POLICY_FAIL */
+		.err	= EACCES,
+		.fatal	= 1,
+	},
+	{	/* REJECT_ROUTE	*/
+		.err	= EACCES,
+		.fatal	= 1,
+	},
 };
 
 int icmpv6_err_convert(u8 type, u8 code, int *err)
@@ -942,7 +950,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
 	switch (type) {
 	case ICMPV6_DEST_UNREACH:
 		fatal = 1;
-		if (code <= ICMPV6_PORT_UNREACH) {
+		if (code < ARRAY_SIZE(tab_unreach)) {
 			*err  = tab_unreach[code].err;
 			fatal = tab_unreach[code].fatal;
 		}
-- 
cgit v1.2.3


From 84a38c47c2bbcb361caa1631c92eaf5916e178d6 Mon Sep 17 00:00:00 2001
From: Nithin Sujir <nsujir@broadcom.com>
Date: Fri, 30 Aug 2013 17:01:36 -0700
Subject: tg3: Don't turn off led on 5719 serdes port 0

[ Upstream commit 989038e217e94161862a959e82f9a1ecf8dda152 ]

Turning off led on port 0 of the 5719 serdes causes all other ports to
lose power and stop functioning. Add tg3_phy_led_bug() function to check
for this condition. We use a switch() in tg3_phy_led_bug() for
consistency with the tg3_phy_power_bug() function.

Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/tg3.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a13463e8a2c..0877a052e3e 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -3003,6 +3003,19 @@ static bool tg3_phy_power_bug(struct tg3 *tp)
 	return false;
 }
 
+static bool tg3_phy_led_bug(struct tg3 *tp)
+{
+	switch (tg3_asic_rev(tp)) {
+	case ASIC_REV_5719:
+		if ((tp->phy_flags & TG3_PHYFLG_MII_SERDES) &&
+		    !tp->pci_fn)
+			return true;
+		return false;
+	}
+
+	return false;
+}
+
 static void tg3_power_down_phy(struct tg3 *tp, bool do_low_power)
 {
 	u32 val;
@@ -3050,8 +3063,9 @@ static void tg3_power_down_phy(struct tg3 *tp, bool do_low_power)
 		}
 		return;
 	} else if (do_low_power) {
-		tg3_writephy(tp, MII_TG3_EXT_CTRL,
-			     MII_TG3_EXT_CTRL_FORCE_LED_OFF);
+		if (!tg3_phy_led_bug(tp))
+			tg3_writephy(tp, MII_TG3_EXT_CTRL,
+				     MII_TG3_EXT_CTRL_FORCE_LED_OFF);
 
 		val = MII_TG3_AUXCTL_PCTL_100TX_LPWR |
 		      MII_TG3_AUXCTL_PCTL_SPR_ISOLATE |
-- 
cgit v1.2.3


From bd35c1a7f659dfa01179e456881285a9f057d30c Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 2 Sep 2013 16:41:00 +0800
Subject: vhost_net: poll vhost queue after marking DMA is done

[ Upstream commit 19c73b3e08d16ee923f3962df4abf6205127896a ]

We used to poll vhost queue before making DMA is done, this is racy if vhost
thread were waked up before marking DMA is done which can result the signal to
be missed. Fix this by always polling the vhost thread before DMA is done.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/vhost/net.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 8ca5ac71b84..d6a518ce4d6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -307,6 +307,11 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
 	struct vhost_virtqueue *vq = ubufs->vq;
 	int cnt = atomic_read(&ubufs->kref.refcount);
 
+	/* set len to mark this desc buffers done DMA */
+	vq->heads[ubuf->desc].len = success ?
+		VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
+	vhost_net_ubuf_put(ubufs);
+
 	/*
 	 * Trigger polling thread if guest stopped submitting new buffers:
 	 * in this case, the refcount after decrement will eventually reach 1
@@ -317,10 +322,6 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
 	 */
 	if (cnt <= 2 || !(cnt % 16))
 		vhost_poll_queue(&vq->poll);
-	/* set len to mark this desc buffers done DMA */
-	vq->heads[ubuf->desc].len = success ?
-		VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
-	vhost_net_ubuf_put(ubufs);
 }
 
 /* Expects to be always run from workqueue - which acts as
-- 
cgit v1.2.3


From ef1f8bcdc2febd53978905b5b0a5201104cce653 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 3 Sep 2013 02:13:31 +0200
Subject: ipv6: fix null pointer dereference in __ip6addrlbl_add

[ Upstream commit 639739b5e609a5074839bb22fc061b37baa06269 ]

Commit b67bfe0d42cac56c512dd5da4b1b347a23f4b70a ("hlist: drop
the node parameter from iterators") changed the behavior of
hlist_for_each_entry_safe to leave the p argument NULL.

Fix this up by tracking the last argument.

Reported-by: Michele Baldessari <michele@acksyn.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Tested-by: Michele Baldessari <michele@acksyn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/addrlabel.c | 48 +++++++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f083a583a05..b30ad3741b4 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -251,38 +251,36 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
 /* add a label */
 static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 {
+	struct hlist_node *n;
+	struct ip6addrlbl_entry *last = NULL, *p = NULL;
 	int ret = 0;
 
-	ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
-			__func__,
-			newp, replace);
+	ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
+		  replace);
 
-	if (hlist_empty(&ip6addrlbl_table.head)) {
-		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
-	} else {
-		struct hlist_node *n;
-		struct ip6addrlbl_entry *p = NULL;
-		hlist_for_each_entry_safe(p, n,
-					  &ip6addrlbl_table.head, list) {
-			if (p->prefixlen == newp->prefixlen &&
-			    net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
-			    p->ifindex == newp->ifindex &&
-			    ipv6_addr_equal(&p->prefix, &newp->prefix)) {
-				if (!replace) {
-					ret = -EEXIST;
-					goto out;
-				}
-				hlist_replace_rcu(&p->list, &newp->list);
-				ip6addrlbl_put(p);
-				goto out;
-			} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
-				   (p->prefixlen < newp->prefixlen)) {
-				hlist_add_before_rcu(&newp->list, &p->list);
+	hlist_for_each_entry_safe(p, n,	&ip6addrlbl_table.head, list) {
+		if (p->prefixlen == newp->prefixlen &&
+		    net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
+		    p->ifindex == newp->ifindex &&
+		    ipv6_addr_equal(&p->prefix, &newp->prefix)) {
+			if (!replace) {
+				ret = -EEXIST;
 				goto out;
 			}
+			hlist_replace_rcu(&p->list, &newp->list);
+			ip6addrlbl_put(p);
+			goto out;
+		} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
+			   (p->prefixlen < newp->prefixlen)) {
+			hlist_add_before_rcu(&newp->list, &p->list);
+			goto out;
 		}
-		hlist_add_after_rcu(&p->list, &newp->list);
+		last = p;
 	}
+	if (last)
+		hlist_add_after_rcu(&last->list, &newp->list);
+	else
+		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
 out:
 	if (!ret)
 		ip6addrlbl_table.seq++;
-- 
cgit v1.2.3


From a22eb149b18ed385c72d527c42dc398c97b6387f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 3 Sep 2013 19:29:12 +0200
Subject: net: ipv6: tcp: fix potential use after free in tcp_v6_do_rcv

[ Upstream commit 3a1c756590633c0e86df606e5c618c190926a0df ]

In tcp_v6_do_rcv() code, when processing pkt options, we soley work
on our skb clone opt_skb that we've created earlier before entering
tcp_rcv_established() on our way. However, only in condition ...

  if (np->rxopt.bits.rxtclass)
    np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));

... we work on skb itself. As we extract every other information out
of opt_skb in ipv6_pktoptions path, this seems wrong, since skb can
already be released by tcp_rcv_established() earlier on. When we try
to access it in ipv6_hdr(), we will dereference freed skb.

[ Bug added by commit 4c507d2897bd9b ("net: implement IP_RECVTOS for
  IP_PKTOPTIONS") ]

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0a17ed9eaf3..66c718854e5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1426,7 +1426,7 @@ ipv6_pktoptions:
 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
 		if (np->rxopt.bits.rxtclass)
-			np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+			np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb));
 		if (ipv6_opt_accepted(sk, opt_skb)) {
 			skb_set_owner_r(opt_skb, sk);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
-- 
cgit v1.2.3


From 4c54b9db01842eb0f4eb54af6949b7606ea39e7a Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 4 Sep 2013 16:21:18 +0200
Subject: net: mvneta: properly disable HW PHY polling and ensure adjust_link()
 works
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 714086029116b6b0a34e67ba1dd2f0d1cf26770c ]

This commit fixes a long-standing bug that has been reported by many
users: on some Armada 370 platforms, only the network interface that
has been used in U-Boot to tftp the kernel works properly in
Linux. The other network interfaces can see a 'link up', but are
unable to transmit data. The reports were generally made on the Armada
370-based Mirabox, but have also been given on the Armada 370-RD
board.

The network MAC in the Armada 370/XP (supported by the mvneta driver
in Linux) has a functionality that allows it to continuously poll the
PHY and directly update the MAC configuration accordingly (speed,
duplex, etc.). The very first versions of the driver submitted for
review were using this hardware mechanism, but due to this, the driver
was not integrated with the kernel phylib. Following reviews, the
driver was changed to use the phylib, and therefore a software based
polling. In software based polling, Linux regularly talks to the PHY
over the MDIO bus, and sees if the link status has changed. If it's
the case then the adjust_link() callback of the driver is called to
update the MAC configuration accordingly.

However, it turns out that the adjust_link() callback was not
configuring the hardware in a completely correct way: while it was
setting the speed and duplex bits correctly, it wasn't telling the
hardware to actually take into account those bits rather than what the
hardware-based PHY polling mechanism has concluded. So, in fact the
adjust_link() callback was basically a no-op.

However, the network happened to be working because on the network
interfaces used by U-Boot for tftp on Armada 370 platforms because the
hardware PHY polling was enabled by the bootloader, and left enabled
by Linux. However, the second network interface not used for tftp (or
both network interfaces if the kernel is loaded from USB, NAND or SD
card) didn't had the hardware PHY polling enabled.

This patch fixes this situation by:

 (1) Making sure that the hardware PHY polling is disabled by clearing
     the MVNETA_PHY_POLLING_ENABLE bit in the MVNETA_UNIT_CONTROL
     register in the driver ->probe() function.

 (2) Making sure that the duplex and speed selections made by the
     adjust_link() callback are taken into account by clearing the
     MVNETA_GMAC_AN_SPEED_EN and MVNETA_GMAC_AN_DUPLEX_EN bits in the
     MVNETA_GMAC_AUTONEG_CONFIG register.

This patch has been tested on Armada 370 Mirabox, and now both network
interfaces are usable after boot.

[ Problem introduced by commit c5aff18 ("net: mvneta: driver for
  Marvell Armada 370/XP network unit") ]

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Jochen De Smet <jochen.armkernel@leahnim.org>
Cc: Peter Sanford <psanford@nearbuy.io>
Cc: Ethan Tuttle <ethan@ethantuttle.com>
Cc: Chény Yves-Gael <yves@cheny.fr>
Cc: Ryan Press <ryan@presslab.us>
Cc: Simon Guinot <simon.guinot@sequanux.org>
Cc: vdonnefort@lacie.com
Cc: stable@vger.kernel.org
Acked-by: Jason Cooper <jason@lakedaemon.net>
Tested-by: Vincent Donnefort <vdonnefort@gmail.com>
Tested-by: Yves-Gael Cheny <yves@cheny.fr>
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index c9667855523..254f255204f 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -136,7 +136,9 @@
 #define      MVNETA_GMAC_FORCE_LINK_PASS         BIT(1)
 #define      MVNETA_GMAC_CONFIG_MII_SPEED        BIT(5)
 #define      MVNETA_GMAC_CONFIG_GMII_SPEED       BIT(6)
+#define      MVNETA_GMAC_AN_SPEED_EN             BIT(7)
 #define      MVNETA_GMAC_CONFIG_FULL_DUPLEX      BIT(12)
+#define      MVNETA_GMAC_AN_DUPLEX_EN            BIT(13)
 #define MVNETA_MIB_COUNTERS_BASE                 0x3080
 #define      MVNETA_MIB_LATE_COLLISION           0x7c
 #define MVNETA_DA_FILT_SPEC_MCAST                0x3400
@@ -911,6 +913,13 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	/* Assign port SDMA configuration */
 	mvreg_write(pp, MVNETA_SDMA_CONFIG, val);
 
+	/* Disable PHY polling in hardware, since we're using the
+	 * kernel phylib to do this.
+	 */
+	val = mvreg_read(pp, MVNETA_UNIT_CONTROL);
+	val &= ~MVNETA_PHY_POLLING_ENABLE;
+	mvreg_write(pp, MVNETA_UNIT_CONTROL, val);
+
 	mvneta_set_ucast_table(pp, -1);
 	mvneta_set_special_mcast_table(pp, -1);
 	mvneta_set_other_mcast_table(pp, -1);
@@ -2288,7 +2297,9 @@ static void mvneta_adjust_link(struct net_device *ndev)
 			val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
 			val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED |
 				 MVNETA_GMAC_CONFIG_GMII_SPEED |
-				 MVNETA_GMAC_CONFIG_FULL_DUPLEX);
+				 MVNETA_GMAC_CONFIG_FULL_DUPLEX |
+				 MVNETA_GMAC_AN_SPEED_EN |
+				 MVNETA_GMAC_AN_DUPLEX_EN);
 
 			if (phydev->duplex)
 				val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX;
-- 
cgit v1.2.3


From 751190a69d7c885b98716e2aca339f6c4b988704 Mon Sep 17 00:00:00 2001
From: John Haxby <john.haxby@oracle.com>
Date: Wed, 14 Aug 2013 16:23:18 +0100
Subject: crypto: xor - Check for osxsave as well as avx in crypto/xor

commit edb6f29464afc65fc73767540b854abf63ae7144 upstream.

This affects xen pv guests with sufficiently old versions of xen and
sufficiently new hardware.  On such a system, a guest with a btrfs
root won't even boot.

Signed-off-by: John Haxby <john.haxby@oracle.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reported-by: Michael Marineau <michael.marineau@coreos.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/xor_avx.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 7ea79c5fa1f..492b29802f5 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -167,12 +167,12 @@ static struct xor_block_template xor_block_avx = {
 
 #define AVX_XOR_SPEED \
 do { \
-	if (cpu_has_avx) \
+	if (cpu_has_avx && cpu_has_osxsave) \
 		xor_speed(&xor_block_avx); \
 } while (0)
 
 #define AVX_SELECT(FASTEST) \
-	(cpu_has_avx ? &xor_block_avx : FASTEST)
+	(cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST)
 
 #else
 
-- 
cgit v1.2.3


From 70aa7b8f105b0b5bb82d2082a230efb7f34645a0 Mon Sep 17 00:00:00 2001
From: Sangjung Woo <sangjung.woo@samsung.com>
Date: Wed, 11 Sep 2013 14:24:21 -0700
Subject: drivers/rtc/rtc-max77686.c: Fix wrong register

commit 1748cbf7f7c464593232cde914f5a103181a83b5 upstream.

Fix a read of the wrong register when checking whether the RTC timer has
reached the alarm time.

Signed-off-by: Sangjung Woo <sangjung.woo@samsung.com>
Signed-off-by: Myugnjoo Ham <myungjoo.ham@samsung.com>
Reviewed-by: Jonghwa Lee <jonghwa3.lee@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jonghwan Choi <jhbird.choi@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-max77686.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c
index 771812d62e6..3bb9401f1ca 100644
--- a/drivers/rtc/rtc-max77686.c
+++ b/drivers/rtc/rtc-max77686.c
@@ -240,9 +240,9 @@ static int max77686_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	}
 
 	alrm->pending = 0;
-	ret = regmap_read(info->max77686->regmap, MAX77686_REG_STATUS1, &val);
+	ret = regmap_read(info->max77686->regmap, MAX77686_REG_STATUS2, &val);
 	if (ret < 0) {
-		dev_err(info->dev, "%s:%d fail to read status1 reg(%d)\n",
+		dev_err(info->dev, "%s:%d fail to read status2 reg(%d)\n",
 				__func__, __LINE__, ret);
 		goto out;
 	}
-- 
cgit v1.2.3


From 7bf0f5bd9213cd0d75383ec36d3aae5f08e185c4 Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Mon, 19 Aug 2013 16:10:21 -0700
Subject: mwifiex: do not create AP and P2P interfaces upon driver loading

commit 1211c961170cedb21c30d5bb7e2033c8720b38db upstream.

Bug 60747 - 1286:2044 [Microsoft Surface Pro]
    Marvell 88W8797 wifi show 3 interface under network
https://bugzilla.kernel.org/show_bug.cgi?id=60747

This issue was also reported previously by OLPC and some folks from
the community.

There are 3 network interfaces with different types being created
when mwifiex driver is loaded:

1. mlan0 (infra. STA)
2. uap0 (AP)
3. p2p0 (P2P_CLIENT)

The Network Manager attempts to use all 3 interfaces above without
filtering the managed interface type. As the result, 3 identical
interfaces are displayed under network manager. If user happens to
click on an entry under which its interface is uap0 or p2p0, the
association will fail.

Work around it by removing the creation of AP and P2P interfaces
at driver loading time. These interfaces can be added with 'iw' or
other applications manually when they are needed.

Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: Avinash Patil <patila@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/mwifiex/main.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c
index 2eb88ea9acf..c4a2e775fe1 100644
--- a/drivers/net/wireless/mwifiex/main.c
+++ b/drivers/net/wireless/mwifiex/main.c
@@ -363,20 +363,6 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context)
 		dev_err(adapter->dev, "cannot create default STA interface\n");
 		goto err_add_intf;
 	}
-
-	/* Create AP interface by default */
-	if (!mwifiex_add_virtual_intf(adapter->wiphy, "uap%d",
-				      NL80211_IFTYPE_AP, NULL, NULL)) {
-		dev_err(adapter->dev, "cannot create default AP interface\n");
-		goto err_add_intf;
-	}
-
-	/* Create P2P interface by default */
-	if (!mwifiex_add_virtual_intf(adapter->wiphy, "p2p%d",
-				      NL80211_IFTYPE_P2P_CLIENT, NULL, NULL)) {
-		dev_err(adapter->dev, "cannot create default P2P interface\n");
-		goto err_add_intf;
-	}
 	rtnl_unlock();
 
 	mwifiex_drv_get_driver_version(adapter, fmt, sizeof(fmt) - 1);
-- 
cgit v1.2.3


From 9cfceacb4983272c1f0a6926aec2b127143df692 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Sun, 26 May 2013 16:55:59 +0800
Subject: ARM: at91: dt: sam9260: add i2c gpio pinctrl

commit f89ae61bd74ae195c464bdd97a134e30908884d5 upstream.

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Boris BREZILLON <b.brezillon@overkiz.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/at91sam9260.dtsi | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi
index 84c4bef2d72..43a18f74cdd 100644
--- a/arch/arm/boot/dts/at91sam9260.dtsi
+++ b/arch/arm/boot/dts/at91sam9260.dtsi
@@ -340,6 +340,14 @@
 					};
 				};
 
+				i2c_gpio0 {
+					pinctrl_i2c_gpio0: i2c_gpio0-0 {
+						atmel,pins =
+							<0 23 0x0 0x3   /* PA23 gpio I2C_SDA pin */
+							 0 24 0x0 0x3>; /* PA24 gpio I2C_SCL pin */
+					};
+				};
+
 				pioA: gpio@fffff400 {
 					compatible = "atmel,at91rm9200-gpio";
 					reg = <0xfffff400 0x200>;
@@ -592,6 +600,8 @@
 		i2c-gpio,delay-us = <2>;	/* ~100 kHz */
 		#address-cells = <1>;
 		#size-cells = <0>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c_gpio0>;
 		status = "disabled";
 	};
 };
-- 
cgit v1.2.3


From 410dbb746130bc052bb3c7a337a86252874adbdf Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 14 Sep 2013 06:55:12 -0700
Subject: Linux 3.10.12

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 595076dc14a..afe001e3b2d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 11
+SUBLEVEL = 12
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 
-- 
cgit v1.2.3