aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSairam Venugopal <vsairam@vmware.com>2015-10-26 16:48:41 -0700
committerGurucharan Shetty <gshetty@nicira.com>2015-10-27 13:49:03 -0700
commita422ea1d6011048fa722b199437801338a9560cb (patch)
tree0ed10390070332faf0d5363dfa88278c41b9f38c
parentcab433d0f3d96fd143e76b6fea21449ab7b89240 (diff)
datapath-windows: STT - Enable support for TCP Segmentation offloadsmaster
Add support to STT - Encap and Decap functions to reassemble the packet fragments. Also add support to offload the packet to NDIS. Signed-off-by: Sairam Venugopal <vsairam@vmware.com> Acked-by: Nithin Raju <nithin@vmware.com> Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
-rw-r--r--datapath-windows/ovsext/Actions.c40
-rw-r--r--datapath-windows/ovsext/Stt.c398
2 files changed, 329 insertions, 109 deletions
diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index b4644a7e3..ce592b3c7 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -594,7 +594,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
InitializeListHead(&missedPackets);
status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS, vport,
&key,ovsFwdCtx->curNbl,
- ovsFwdCtx->tunnelRxNic != NULL, &ovsFwdCtx->layers,
+ FALSE, &ovsFwdCtx->layers,
ovsFwdCtx->switchContext, &missedPackets, &num);
if (num) {
OvsQueuePackets(&missedPackets, num);
@@ -709,6 +709,7 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
NDIS_STATUS status = NDIS_STATUS_SUCCESS;
PNET_BUFFER_LIST newNbl = NULL;
POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
+ PCWSTR dropReason = L"OVS-dropped due to new decap packet";
if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
!= NDIS_STATUS_SUCCESS) {
@@ -730,6 +731,10 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
case OVS_VPORT_TYPE_STT:
status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
&ovsFwdCtx->tunKey, &newNbl);
+ if (status == NDIS_STATUS_SUCCESS && newNbl == NULL) {
+ /* This was an STT-LSO Fragment */
+ dropReason = L"OVS-STT segment is cached";
+ }
break;
default:
OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
@@ -747,25 +752,26 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
* tunnelRxNic and other fields will be cleared, re-init the context
* before usage.
*/
- OvsCompleteNBLForwardingCtx(ovsFwdCtx,
- L"OVS-dropped due to new decap packet");
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
- /* Decapsulated packet is in a new NBL */
- ovsFwdCtx->tunnelRxNic = tunnelRxVport;
- OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
- newNbl, tunnelRxVport->portNo, 0,
- NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
- ovsFwdCtx->completionList,
- &ovsFwdCtx->layers, FALSE);
+ if (newNbl) {
+ /* Decapsulated packet is in a new NBL */
+ ovsFwdCtx->tunnelRxNic = tunnelRxVport;
+ OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
+ newNbl, tunnelRxVport->portNo, 0,
+ NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
+ ovsFwdCtx->completionList,
+ &ovsFwdCtx->layers, FALSE);
- /*
- * Set the NBL's SourcePortId and SourceNicIndex to default values to
- * keep NDIS happy when we forward the packet.
- */
- ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
- ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
+ /*
+ * Set the NBL's SourcePortId and SourceNicIndex to default values to
+ * keep NDIS happy when we forward the packet.
+ */
+ ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
+ ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
- status = OvsDoFlowLookupOutput(ovsFwdCtx);
+ status = OvsDoFlowLookupOutput(ovsFwdCtx);
+ }
ASSERT(ovsFwdCtx->curNbl == NULL);
OvsClearTunRxCtx(ovsFwdCtx);
diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c
index b78ef952c..ef44d237c 100644
--- a/datapath-windows/ovsext/Stt.c
+++ b/datapath-windows/ovsext/Stt.c
@@ -34,6 +34,7 @@
#endif
#define OVS_DBG_MOD OVS_DBG_STT
#include "Debug.h"
+#include "Jhash.h"
KSTART_ROUTINE OvsSttDefragCleaner;
static PLIST_ENTRY OvsSttPktFragHash;
@@ -152,8 +153,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
UINT32 headRoom = OvsGetSttTunHdrSize();
UINT32 tcpChksumLen;
PUINT8 bufferStart;
-
- UNREFERENCED_PARAMETER(layers);
+ ULONG mss = 0;
+ NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
@@ -162,14 +163,20 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
BOOLEAN innerPartialChecksum = FALSE;
if (layers->isTcp) {
- NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
-
lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
TcpLargeSendNetBufferListInfo);
- if (lsoInfo.LsoV1Transmit.MSS) {
- /* XXX We don't handle LSO yet */
- OVS_LOG_ERROR("LSO on STT is not supported");
- return NDIS_STATUS_FAILURE;
+
+ switch (lsoInfo.Transmit.Type) {
+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
+ mss = lsoInfo.LsoV1Transmit.MSS;
+ break;
+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
+ mss = lsoInfo.LsoV2Transmit.MSS;
+ break;
+ default:
+ OVS_LOG_ERROR("Unknown LSO transmit type:%d",
+ lsoInfo.Transmit.Type);
+ return NDIS_STATUS_FAILURE;
}
}
@@ -186,21 +193,36 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
return NDIS_STATUS_FAILURE;
}
- curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
+ curNbl = *newNbl;
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ /* NB Chain should be split before */
+ ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
+ innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
+
bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
LowPagePriority);
bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
- if (layers->isIPv4 && csumInfo.Transmit.IpHeaderChecksum) {
+ if (layers->isIPv4) {
IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
- ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
+ if (!ip->tot_len) {
+ ip->tot_len = htons(innerFrameLen - sizeof(EthHdr));
+ }
+ if (!ip->check) {
+ ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
+ }
}
+
if (layers->isTcp) {
- if(!csumInfo.Transmit.TcpChecksum) {
- innerChecksumVerified = TRUE;
- } else {
+ if (mss) {
innerPartialChecksum = TRUE;
+ } else {
+ if (!csumInfo.Transmit.TcpChecksum) {
+ innerChecksumVerified = TRUE;
+ } else {
+ innerPartialChecksum = TRUE;
+ }
}
} else if (layers->isUdp) {
if(!csumInfo.Transmit.UdpChecksum) {
@@ -210,24 +232,6 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
}
}
- curNbl = *newNbl;
- curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
- /* NB Chain should be split before */
- ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
-
- innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
- /*
- * External port can't be removed as we hold the dispatch lock
- * We also check if the external port was removed beforecalling
- * port encapsulation functions
- */
- if (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom) {
- OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't encapsulate",
- innerFrameLen, OvsGetExternalMtu(switchContext));
- status = NDIS_STATUS_FAILURE;
- goto ret_error;
- }
-
status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
if (status != NDIS_STATUS_SUCCESS) {
ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)");
@@ -301,33 +305,52 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
IPPROTO_TCP, (uint16) tcpChksumLen);
sttHdr->version = 0;
- /* XXX need to peek into the inner packet, hard code for now */
- sttHdr->flags = STT_PROTO_IPV4;
- if (innerChecksumVerified) {
- sttHdr->flags |= STT_CSUM_VERIFIED;
- } else if (innerPartialChecksum) {
+ /* Set STT Header */
+ sttHdr->flags = 0;
+ if (innerPartialChecksum) {
sttHdr->flags |= STT_CSUM_PARTIAL;
+ if (layers->isIPv4) {
+ sttHdr->flags |= STT_PROTO_IPV4;
+ }
+ if (layers->isTcp) {
+ sttHdr->flags |= STT_PROTO_TCP;
+ }
+ sttHdr->l4Offset = (UINT8) layers->l4Offset;
+ sttHdr->mss = (UINT16) htons(mss);
+ } else if (innerChecksumVerified) {
+ sttHdr->flags = STT_CSUM_VERIFIED;
+ sttHdr->l4Offset = 0;
+ sttHdr->mss = 0;
}
- sttHdr->l4Offset = 0;
sttHdr->reserved = 0;
- /* XXX Used for large TCP packets.Not sure how it is used, clarify */
- sttHdr->mss = 0;
sttHdr->vlanTCI = 0;
sttHdr->key = tunKey->tunnelId;
/* Zero out stt padding */
*(uint16 *)(sttHdr + 1) = 0;
/* Offload IP and TCP checksum */
+ ULONG tcpHeaderOffset = sizeof *outerEthHdr +
+ outerIpHdr->ihl * 4;
csumInfo.Value = 0;
csumInfo.Transmit.IpHeaderChecksum = 1;
csumInfo.Transmit.TcpChecksum = 1;
csumInfo.Transmit.IsIPv4 = 1;
- csumInfo.Transmit.TcpHeaderOffset = sizeof *outerEthHdr +
- outerIpHdr->ihl * 4;
+ csumInfo.Transmit.TcpHeaderOffset = tcpHeaderOffset;
NET_BUFFER_LIST_INFO(curNbl,
TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
+ UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - sizeof(TCPHdr);
+ if (ipTotalLen > encapMss) {
+ lsoInfo.Value = 0;
+ lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset;
+ lsoInfo.LsoV2Transmit.MSS = encapMss;
+ lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
+ lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
+ NET_BUFFER_LIST_INFO(curNbl,
+ TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
+ }
+
return STATUS_SUCCESS;
ret_error:
@@ -338,16 +361,22 @@ ret_error:
/*
*----------------------------------------------------------------------------
- * OvsCalculateTCPChecksum
- * Calculate TCP checksum
+ * OvsValidateTCPChecksum
+ * Validate TCP checksum
*----------------------------------------------------------------------------
*/
static __inline NDIS_STATUS
-OvsCalculateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
+OvsValidateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
{
NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
TcpIpChecksumNetBufferListInfo);
+
+ /* Check if NIC has indicated TCP checksum failure */
+ if (csumInfo.Receive.TcpChecksumFailed) {
+ return NDIS_STATUS_INVALID_PACKET;
+ }
+
UINT16 checkSum;
/* Check if TCP Checksum has been calculated by NIC */
@@ -399,10 +428,9 @@ OvsInitSttDefragmentation()
NdisAllocateSpinLock(&OvsSttSpinLock);
/* Init the Hash Buffer */
- OvsSttPktFragHash = (PLIST_ENTRY) OvsAllocateMemoryWithTag(
- sizeof(LIST_ENTRY)
- * STT_HASH_TABLE_SIZE,
- OVS_STT_POOL_TAG);
+ OvsSttPktFragHash = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
+ * STT_HASH_TABLE_SIZE,
+ OVS_STT_POOL_TAG);
if (OvsSttPktFragHash == NULL) {
NdisFreeSpinLock(&OvsSttSpinLock);
return STATUS_INSUFFICIENT_RESOURCES;
@@ -487,6 +515,7 @@ OvsSttDefragCleaner(PVOID data)
entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
if (entry->timeout < currentTime) {
RemoveEntryList(&entry->link);
+ OvsFreeMemoryWithTag(entry->packetBuf, OVS_STT_POOL_TAG);
OvsFreeMemoryWithTag(entry, OVS_STT_POOL_TAG);
}
}
@@ -500,6 +529,158 @@ OvsSttDefragCleaner(PVOID data)
PsTerminateSystemThread(STATUS_SUCCESS);
}
+static OVS_STT_PKT_KEY
+OvsGeneratePacketKey(IPHdr *ipHdr, TCPHdr *tcpHdr)
+{
+ OVS_STT_PKT_KEY key;
+ key.sAddr = ipHdr->saddr;
+ key.dAddr = ipHdr->daddr;
+ key.ackSeq = ntohl(tcpHdr->ack_seq);
+ return key;
+}
+
+static UINT32
+OvsSttGetPktHash(OVS_STT_PKT_KEY *pktKey)
+{
+ UINT32 arr[3];
+ arr[0] = pktKey->ackSeq;
+ arr[1] = pktKey->dAddr;
+ arr[2] = pktKey->sAddr;
+ return OvsJhashWords(arr, 3, OVS_HASH_BASIS);
+}
+
+static VOID *
+OvsLookupPktFrag(OVS_STT_PKT_KEY *pktKey, UINT32 hash)
+{
+ PLIST_ENTRY link;
+ POVS_STT_PKT_ENTRY entry;
+
+ LIST_FORALL(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK], link) {
+ entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
+ if (entry->ovsPktKey.ackSeq == pktKey->ackSeq &&
+ entry->ovsPktKey.dAddr == pktKey->dAddr &&
+ entry->ovsPktKey.sAddr == pktKey->sAddr) {
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+/*
+*
+--------------------------------------------------------------------------
+* OvsSttReassemble --
+* Reassemble an LSO packet from multiple STT-Fragments.
+*
+--------------------------------------------------------------------------
+*/
+PNET_BUFFER_LIST
+OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST curNbl,
+ IPHdr *ipHdr,
+ TCPHdr *tcp,
+ SttHdr *newSttHdr,
+ UINT16 payloadLen)
+{
+ UINT32 seq = ntohl(tcp->seq);
+ UINT32 innerPacketLen = (seq >> STT_SEQ_LEN_SHIFT) - STT_HDR_LEN;
+ UINT32 segOffset = STT_SEGMENT_OFF(seq);
+ UINT32 offset = segOffset == 0 ? 0 : segOffset - STT_HDR_LEN;
+ UINT32 startOffset = 0;
+ OVS_STT_PKT_ENTRY *pktFragEntry;
+ PNET_BUFFER_LIST targetPNbl = NULL;
+ BOOLEAN lastPacket = FALSE;
+ PNET_BUFFER sourceNb;
+ UINT32 fragmentLength = payloadLen;
+ SttHdr stt;
+ SttHdr *sttHdr = NULL;
+ sourceNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+
+ /* XXX optimize this lock */
+ NdisAcquireSpinLock(&OvsSttSpinLock);
+
+ /* If this is the first fragment, copy the STT header */
+ if (segOffset == 0) {
+ sttHdr = NdisGetDataBuffer(sourceNb, sizeof(SttHdr), &stt, 1, 0);
+ if (sttHdr == NULL) {
+ OVS_LOG_ERROR("Unable to retrieve STT header");
+ return NULL;
+ }
+ fragmentLength = fragmentLength - STT_HDR_LEN;
+ startOffset = startOffset + STT_HDR_LEN;
+ }
+
+ /* Lookup fragment */
+ OVS_STT_PKT_KEY pktKey = OvsGeneratePacketKey(ipHdr, tcp);
+ UINT32 hash = OvsSttGetPktHash(&pktKey);
+ pktFragEntry = OvsLookupPktFrag(&pktKey, hash);
+
+ if (pktFragEntry == NULL) {
+ /* Create a new Packet Entry */
+ POVS_STT_PKT_ENTRY entry;
+ entry = OvsAllocateMemoryWithTag(sizeof(OVS_STT_PKT_ENTRY),
+ OVS_STT_POOL_TAG);
+ RtlZeroMemory(entry, sizeof (OVS_STT_PKT_ENTRY));
+
+ /* Update Key, timestamp and recvdLen */
+ NdisMoveMemory(&entry->ovsPktKey, &pktKey, sizeof (OVS_STT_PKT_KEY));
+
+ entry->recvdLen = fragmentLength;
+
+ UINT64 currentTime;
+ NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
+ entry->timeout = currentTime + STT_ENTRY_TIMEOUT;
+
+ if (segOffset == 0) {
+ entry->sttHdr = *sttHdr;
+ }
+
+ /* Copy the data from Source to new buffer */
+ entry->packetBuf = OvsAllocateMemoryWithTag(innerPacketLen,
+ OVS_STT_POOL_TAG);
+ if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
+ entry->packetBuf + offset) == NULL) {
+ OVS_LOG_ERROR("Error when obtaining bytes from Packet");
+ goto handle_error;
+ }
+
+ /* Insert the entry in the Static Buffer */
+ InsertHeadList(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK],
+ &entry->link);
+ } else {
+ /* Add to recieved length to identify if this is the last fragment */
+ pktFragEntry->recvdLen += fragmentLength;
+ lastPacket = (pktFragEntry->recvdLen == innerPacketLen);
+
+ if (segOffset == 0) {
+ pktFragEntry->sttHdr = *sttHdr;
+ }
+
+ /* Copy the fragment data from Source to existing buffer */
+ if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
+ pktFragEntry->packetBuf + offset) == NULL) {
+ OVS_LOG_ERROR("Error when obtaining bytes from Packet");
+ goto handle_error;
+ }
+ }
+
+handle_error:
+ if (lastPacket) {
+ /* Retrieve the original STT header */
+ NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof (SttHdr));
+ targetPNbl = OvsAllocateNBLFromBuffer(switchContext, pktFragEntry->packetBuf,
+ innerPacketLen);
+
+ /* Delete this entry and free up the memory/ */
+ RemoveEntryList(&pktFragEntry->link);
+ OvsFreeMemoryWithTag(pktFragEntry->packetBuf, OVS_STT_POOL_TAG);
+ OvsFreeMemoryWithTag(pktFragEntry, OVS_STT_POOL_TAG);
+ }
+
+ NdisReleaseSpinLock(&OvsSttSpinLock);
+ return lastPacket ? targetPNbl : NULL;
+}
+
/*
* --------------------------------------------------------------------------
* OvsDecapStt --
@@ -513,34 +694,20 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
PNET_BUFFER_LIST *newNbl)
{
NDIS_STATUS status = NDIS_STATUS_FAILURE;
- PNET_BUFFER curNb;
+ PNET_BUFFER curNb, newNb;
IPHdr *ipHdr;
char *ipBuf[sizeof(IPHdr)];
+ SttHdr stt;
SttHdr *sttHdr;
char *sttBuf[STT_HDR_LEN];
UINT32 advanceCnt, hdrLen;
- NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+ BOOLEAN isLsoPacket = FALSE;
curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
- if (NET_BUFFER_DATA_LENGTH(curNb) < OvsGetSttTunHdrSize()) {
- OVS_LOG_ERROR("Packet length received is less than the tunnel header:"
- " %d<%d\n", NET_BUFFER_DATA_LENGTH(curNb), OvsGetSttTunHdrSize());
- return NDIS_STATUS_INVALID_LENGTH;
- }
-
- /* Verify outer TCP Checksum */
- csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
- TcpIpChecksumNetBufferListInfo);
-
- /* Check if NIC has indicated TCP checksum failure */
- if (csumInfo.Receive.TcpChecksumFailed) {
- return NDIS_STATUS_INVALID_PACKET;
- }
-
- /* Calculate the TCP Checksum */
- status = OvsCalculateTCPChecksum(curNbl, curNb);
+ /* Validate the TCP Checksum */
+ status = OvsValidateTCPChecksum(curNbl, curNb);
if (status != NDIS_STATUS_SUCCESS) {
return status;
}
@@ -554,34 +721,73 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
1 /*no align*/, 0);
ASSERT(ipHdr);
+ TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
+
/* Skip IP & TCP headers */
hdrLen = sizeof(IPHdr) + sizeof(TCPHdr),
NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
advanceCnt += hdrLen;
- /* STT Header */
- sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf,
- 1 /*no align*/, 0);
+ UINT32 seq = ntohl(tcp->seq);
+ UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT);
+ UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len)
+ - (ipHdr->ihl * 4)
+ - (sizeof * tcp);
+
+ /* Check if incoming packet requires reassembly */
+ if (totalLen != payloadLen) {
+ sttHdr = &stt;
+ PNET_BUFFER_LIST pNbl = OvsSttReassemble(switchContext, curNbl,
+ ipHdr, tcp, sttHdr,
+ payloadLen);
+ if (pNbl == NULL) {
+ return NDIS_STATUS_SUCCESS;
+ }
+
+ *newNbl = pNbl;
+ isLsoPacket = TRUE;
+ } else {
+ /* STT Header */
+ sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr,
+ (PVOID) &sttBuf, 1 /*no align*/, 0);
+ /* Skip stt header, DataOffset points to inner pkt now. */
+ hdrLen = STT_HDR_LEN;
+ NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
+ advanceCnt += hdrLen;
+
+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0,
+ 0, FALSE /*copy NBL info*/);
+ }
+
+ if (*newNbl == NULL) {
+ OVS_LOG_ERROR("Unable to allocate a new cloned NBL");
+ return NDIS_STATUS_RESOURCES;
+ }
+
+ status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsCompleteNBL(switchContext, *newNbl, TRUE);
+ return NDIS_STATUS_FAILURE;
+ }
+ newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
+
ASSERT(sttHdr);
/* Initialize the tunnel key */
tunKey->dst = ipHdr->daddr;
tunKey->src = ipHdr->saddr;
tunKey->tunnelId = sttHdr->key;
- tunKey->flags = (OVS_TNL_F_CSUM | OVS_TNL_F_KEY);
+ tunKey->flags = OVS_TNL_F_KEY;
tunKey->tos = ipHdr->tos;
tunKey->ttl = ipHdr->ttl;
tunKey->pad = 0;
- /* Skip stt header, DataOffset points to inner pkt now. */
- hdrLen = STT_HDR_LEN;
- NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
- advanceCnt += hdrLen;
+ BOOLEAN requiresLSO = sttHdr->mss != 0;
/* Verify checksum for inner packet if it's required */
if (!(sttHdr->flags & STT_CSUM_VERIFIED)) {
BOOLEAN innerChecksumPartial = sttHdr->flags & STT_CSUM_PARTIAL;
- EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr),
+ EthHdr *eth = (EthHdr *)NdisGetDataBuffer(newNb, sizeof(EthHdr),
NULL, 1, 0);
/* XXX Figure out a way to offload checksum receives */
@@ -597,14 +803,16 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
IPPROTO_TCP,
(UINT16)l4Payload);
}
- tcp->check = CalculateChecksumNB(curNb, l4Payload, offset);
+ if (!requiresLSO) {
+ tcp->check = CalculateChecksumNB(newNb, l4Payload, offset);
+ }
} else if (ip->protocol == IPPROTO_UDP) {
UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
if (!innerChecksumPartial){
udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
IPPROTO_UDP, l4Payload);
}
- udp->check = CalculateChecksumNB(curNb, l4Payload, offset);
+ udp->check = CalculateChecksumNB(newNb, l4Payload, offset);
}
} else if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV6)) {
IPv6Hdr *ip = (IPv6Hdr *)((PCHAR)eth + sizeof *eth);
@@ -617,7 +825,9 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
(UINT32 *)&ip->daddr,
IPPROTO_TCP, totalLength);
}
- tcp->check = CalculateChecksumNB(curNb, totalLength, offset);
+ if (!requiresLSO) {
+ tcp->check = CalculateChecksumNB(newNb, totalLength, offset);
+ }
}
else if (ip->nexthdr == IPPROTO_UDP) {
UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
@@ -626,23 +836,27 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
(UINT32 *)&ip->daddr,
IPPROTO_UDP, totalLength);
}
- udp->check = CalculateChecksumNB(curNb, totalLength, offset);
+ udp->check = CalculateChecksumNB(newNb, totalLength, offset);
}
}
- NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
+ NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = 0;
}
- *newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE,
- 0, FALSE /*copy NBL info*/);
-
- ASSERT(advanceCnt == OvsGetSttTunHdrSize());
- status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
-
- if (*newNbl == NULL) {
- OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned NBL");
- status = NDIS_STATUS_RESOURCES;
+ if (requiresLSO) {
+ NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
+ lsoInfo.Value = 0;
+ lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset;
+ lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU - sizeof(IPHdr) - sizeof(TCPHdr);
+ lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
+ if (sttHdr->flags & STT_PROTO_IPV4) {
+ lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
+ } else {
+ lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6;
+ }
+ NET_BUFFER_LIST_INFO(*newNbl,
+ TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
}
- return status;
+ return NDIS_STATUS_SUCCESS;
}