usb: dwc2: host: avoid usage of dma_alloc_coherent with irqs disabled

Use Streaming DMA mappings to handle cache coherency of frame list and
descriptor list. Cache are always flushed before controller access it
or before cpu access it.

Acked-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Gregory Herrero <gregory.herrero@intel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c
index f98c7e91..85d7816 100644
--- a/drivers/usb/dwc2/hcd_ddma.c
+++ b/drivers/usb/dwc2/hcd_ddma.c
@@ -87,22 +87,23 @@
 static int dwc2_desc_list_alloc(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh,
 				gfp_t flags)
 {
-	qh->desc_list = dma_alloc_coherent(hsotg->dev,
-				sizeof(struct dwc2_hcd_dma_desc) *
-				dwc2_max_desc_num(qh), &qh->desc_list_dma,
-				flags);
+	qh->desc_list_sz = sizeof(struct dwc2_hcd_dma_desc) *
+						dwc2_max_desc_num(qh);
 
+	qh->desc_list = kzalloc(qh->desc_list_sz, flags | GFP_DMA);
 	if (!qh->desc_list)
 		return -ENOMEM;
 
-	memset(qh->desc_list, 0,
-	       sizeof(struct dwc2_hcd_dma_desc) * dwc2_max_desc_num(qh));
+	qh->desc_list_dma = dma_map_single(hsotg->dev, qh->desc_list,
+					   qh->desc_list_sz,
+					   DMA_TO_DEVICE);
 
 	qh->n_bytes = kzalloc(sizeof(u32) * dwc2_max_desc_num(qh), flags);
 	if (!qh->n_bytes) {
-		dma_free_coherent(hsotg->dev, sizeof(struct dwc2_hcd_dma_desc)
-				  * dwc2_max_desc_num(qh), qh->desc_list,
-				  qh->desc_list_dma);
+		dma_unmap_single(hsotg->dev, qh->desc_list_dma,
+				 qh->desc_list_sz,
+				 DMA_FROM_DEVICE);
+		kfree(qh->desc_list);
 		qh->desc_list = NULL;
 		return -ENOMEM;
 	}
@@ -113,9 +114,9 @@
 static void dwc2_desc_list_free(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 {
 	if (qh->desc_list) {
-		dma_free_coherent(hsotg->dev, sizeof(struct dwc2_hcd_dma_desc)
-				  * dwc2_max_desc_num(qh), qh->desc_list,
-				  qh->desc_list_dma);
+		dma_unmap_single(hsotg->dev, qh->desc_list_dma,
+				 qh->desc_list_sz, DMA_FROM_DEVICE);
+		kfree(qh->desc_list);
 		qh->desc_list = NULL;
 	}
 
@@ -128,21 +129,20 @@
 	if (hsotg->frame_list)
 		return 0;
 
-	hsotg->frame_list = dma_alloc_coherent(hsotg->dev,
-					       4 * FRLISTEN_64_SIZE,
-					       &hsotg->frame_list_dma,
-					       mem_flags);
+	hsotg->frame_list_sz = 4 * FRLISTEN_64_SIZE;
+	hsotg->frame_list = kzalloc(hsotg->frame_list_sz, GFP_ATOMIC | GFP_DMA);
 	if (!hsotg->frame_list)
 		return -ENOMEM;
 
-	memset(hsotg->frame_list, 0, 4 * FRLISTEN_64_SIZE);
+	hsotg->frame_list_dma = dma_map_single(hsotg->dev, hsotg->frame_list,
+					       hsotg->frame_list_sz,
+					       DMA_TO_DEVICE);
+
 	return 0;
 }
 
 static void dwc2_frame_list_free(struct dwc2_hsotg *hsotg)
 {
-	u32 *frame_list;
-	dma_addr_t frame_list_dma;
 	unsigned long flags;
 
 	spin_lock_irqsave(&hsotg->lock, flags);
@@ -152,14 +152,14 @@
 		return;
 	}
 
-	frame_list = hsotg->frame_list;
-	frame_list_dma = hsotg->frame_list_dma;
+	dma_unmap_single(hsotg->dev, hsotg->frame_list_dma,
+			 hsotg->frame_list_sz, DMA_FROM_DEVICE);
+
+	kfree(hsotg->frame_list);
 	hsotg->frame_list = NULL;
 
 	spin_unlock_irqrestore(&hsotg->lock, flags);
 
-	dma_free_coherent(hsotg->dev, 4 * FRLISTEN_64_SIZE, frame_list,
-			  frame_list_dma);
 }
 
 static void dwc2_per_sched_enable(struct dwc2_hsotg *hsotg, u32 fr_list_en)
@@ -249,6 +249,15 @@
 		j = (j + inc) & (FRLISTEN_64_SIZE - 1);
 	} while (j != i);
 
+	/*
+	 * Sync frame list since controller will access it if periodic
+	 * channel is currently enabled.
+	 */
+	dma_sync_single_for_device(hsotg->dev,
+				   hsotg->frame_list_dma,
+				   hsotg->frame_list_sz,
+				   DMA_TO_DEVICE);
+
 	if (!enable)
 		return;
 
@@ -541,6 +550,11 @@
 		dma_desc->status |= HOST_DMA_IOC;
 #endif
 
+	dma_sync_single_for_device(hsotg->dev,
+			qh->desc_list_dma +
+			(idx * sizeof(struct dwc2_hcd_dma_desc)),
+			sizeof(struct dwc2_hcd_dma_desc),
+			DMA_TO_DEVICE);
 }
 
 static void dwc2_init_isoc_dma_desc(struct dwc2_hsotg *hsotg,
@@ -610,6 +624,11 @@
 	if (qh->ntd == ntd_max) {
 		idx = dwc2_desclist_idx_dec(qh->td_last, inc, qh->dev_speed);
 		qh->desc_list[idx].status |= HOST_DMA_IOC;
+		dma_sync_single_for_device(hsotg->dev,
+					   qh->desc_list_dma + (idx *
+					   sizeof(struct dwc2_hcd_dma_desc)),
+					   sizeof(struct dwc2_hcd_dma_desc),
+					   DMA_TO_DEVICE);
 	}
 #else
 	/*
@@ -639,6 +658,11 @@
 		idx = dwc2_desclist_idx_dec(qh->td_last, inc, qh->dev_speed);
 
 	qh->desc_list[idx].status |= HOST_DMA_IOC;
+	dma_sync_single_for_device(hsotg->dev,
+				   qh->desc_list_dma +
+				   (idx * sizeof(struct dwc2_hcd_dma_desc)),
+				   sizeof(struct dwc2_hcd_dma_desc),
+				   DMA_TO_DEVICE);
 #endif
 }
 
@@ -676,6 +700,12 @@
 
 	dma_desc->buf = (u32)chan->xfer_dma;
 
+	dma_sync_single_for_device(hsotg->dev,
+				   qh->desc_list_dma +
+				   (n_desc * sizeof(struct dwc2_hcd_dma_desc)),
+				   sizeof(struct dwc2_hcd_dma_desc),
+				   DMA_TO_DEVICE);
+
 	/*
 	 * Last (or only) descriptor of IN transfer with actual size less
 	 * than MaxPacket
@@ -726,6 +756,12 @@
 					 "set A bit in desc %d (%p)\n",
 					 n_desc - 1,
 					 &qh->desc_list[n_desc - 1]);
+				dma_sync_single_for_device(hsotg->dev,
+					qh->desc_list_dma +
+					((n_desc - 1) *
+					sizeof(struct dwc2_hcd_dma_desc)),
+					sizeof(struct dwc2_hcd_dma_desc),
+					DMA_TO_DEVICE);
 			}
 			dwc2_fill_host_dma_desc(hsotg, chan, qtd, qh, n_desc);
 			dev_vdbg(hsotg->dev,
@@ -751,10 +787,19 @@
 				HOST_DMA_IOC | HOST_DMA_EOL | HOST_DMA_A;
 		dev_vdbg(hsotg->dev, "set IOC/EOL/A bits in desc %d (%p)\n",
 			 n_desc - 1, &qh->desc_list[n_desc - 1]);
+		dma_sync_single_for_device(hsotg->dev,
+					   qh->desc_list_dma + (n_desc - 1) *
+					   sizeof(struct dwc2_hcd_dma_desc),
+					   sizeof(struct dwc2_hcd_dma_desc),
+					   DMA_TO_DEVICE);
 		if (n_desc > 1) {
 			qh->desc_list[0].status |= HOST_DMA_A;
 			dev_vdbg(hsotg->dev, "set A bit in desc 0 (%p)\n",
 				 &qh->desc_list[0]);
+			dma_sync_single_for_device(hsotg->dev,
+					qh->desc_list_dma,
+					sizeof(struct dwc2_hcd_dma_desc),
+					DMA_TO_DEVICE);
 		}
 		chan->ntd = n_desc;
 	}
@@ -829,7 +874,7 @@
 					struct dwc2_qtd *qtd,
 					struct dwc2_qh *qh, u16 idx)
 {
-	struct dwc2_hcd_dma_desc *dma_desc = &qh->desc_list[idx];
+	struct dwc2_hcd_dma_desc *dma_desc;
 	struct dwc2_hcd_iso_packet_desc *frame_desc;
 	u16 remain = 0;
 	int rc = 0;
@@ -837,6 +882,13 @@
 	if (!qtd->urb)
 		return -EINVAL;
 
+	dma_sync_single_for_cpu(hsotg->dev, qh->desc_list_dma + (idx *
+				sizeof(struct dwc2_hcd_dma_desc)),
+				sizeof(struct dwc2_hcd_dma_desc),
+				DMA_FROM_DEVICE);
+
+	dma_desc = &qh->desc_list[idx];
+
 	frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index_last];
 	dma_desc->buf = (u32)(qtd->urb->dma + frame_desc->offset);
 	if (chan->ep_is_in)
@@ -1092,6 +1144,12 @@
 	if (!urb)
 		return -EINVAL;
 
+	dma_sync_single_for_cpu(hsotg->dev,
+				qh->desc_list_dma + (desc_num *
+				sizeof(struct dwc2_hcd_dma_desc)),
+				sizeof(struct dwc2_hcd_dma_desc),
+				DMA_FROM_DEVICE);
+
 	dma_desc = &qh->desc_list[desc_num];
 	n_bytes = qh->n_bytes[desc_num];
 	dev_vdbg(hsotg->dev,