[S390] fix tlb flushing for page table pages

Git commit 36409f6353fc2d7b6516e631415f938eadd92ffa "use generic RCU
page-table freeing code" introduced a tlb flushing bug. Partially revert
the above git commit and go back to s390 specific page table flush code.

For s390 the TLB can contain three types of entries, "normal" TLB
page-table entries, TLB combined region-and-segment-table (CRST) entries
and real-space entries. Linux does not use real-space entries which
leaves normal TLB entries and CRST entries. The CRST entries are
intermediate steps in the page-table translation called translation paths.
For example a 4K page access in a three-level page table setup will
create two CRST TLB entries and one page-table TLB entry. The advantage
of that approach is that a page access next to the previous one can reuse
the CRST entries and needs just a single read from memory to create the
page-table TLB entry. The disadvantage is that the TLB flushing rules are
more complicated, before any page-table may be freed the TLB needs to be
flushed.

In short: the generic RCU page-table freeing code is incorrect for the
CRST entries, in particular the check for mm_users < 2 is troublesome.

This is applicable to 3.0+ kernels.

Cc: <stable@vger.kernel.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index c687a2c..775a5ee 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -30,14 +30,10 @@
 
 struct mmu_gather {
 	struct mm_struct *mm;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	struct mmu_table_batch *batch;
-#endif
 	unsigned int fullmm;
-	unsigned int need_flush;
 };
 
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 struct mmu_table_batch {
 	struct rcu_head		rcu;
 	unsigned int		nr;
@@ -49,7 +45,6 @@
 
 extern void tlb_table_flush(struct mmu_gather *tlb);
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-#endif
 
 static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 				  struct mm_struct *mm,
@@ -57,29 +52,20 @@
 {
 	tlb->mm = mm;
 	tlb->fullmm = full_mm_flush;
-	tlb->need_flush = 0;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb->batch = NULL;
-#endif
 	if (tlb->fullmm)
 		__tlb_flush_mm(mm);
 }
 
 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
-	if (!tlb->need_flush)
-		return;
-	tlb->need_flush = 0;
-	__tlb_flush_mm(tlb->mm);
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb_table_flush(tlb);
-#endif
 }
 
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
 				  unsigned long start, unsigned long end)
 {
-	tlb_flush_mmu(tlb);
+	tlb_table_flush(tlb);
 }
 
 /*
@@ -105,10 +91,8 @@
 static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 				unsigned long address)
 {
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	if (!tlb->fullmm)
 		return page_table_free_rcu(tlb, (unsigned long *) pte);
-#endif
 	page_table_free(tlb->mm, (unsigned long *) pte);
 }
 
@@ -125,10 +109,8 @@
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 31))
 		return;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	if (!tlb->fullmm)
 		return tlb_remove_table(tlb, pmd);
-#endif
 	crst_table_free(tlb->mm, (unsigned long *) pmd);
 #endif
 }
@@ -146,10 +128,8 @@
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 42))
 		return;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	if (!tlb->fullmm)
 		return tlb_remove_table(tlb, pud);
-#endif
 	crst_table_free(tlb->mm, (unsigned long *) pud);
 #endif
 }