aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorDmitry Fleytman <dmitry.fleytman@ravellosystems.com>2016-06-01 11:23:41 +0300
committerJason Wang <jasowang@redhat.com>2016-06-02 10:42:28 +0800
commiteb700029c7836798046191d62d595363d92c84d4 (patch)
tree7c6b3b8c3de169297291adba1ad959bc87d75a5b /net
parent66409b7c8bd0ebb075a6af8cbc7846fc0a95107d (diff)
net_pkt: Extend packet abstraction as required by e1000e functionality
This patch extends the TX/RX packet abstractions with features that will be used by the e1000e device implementation. Changes are: 1. Support iovec lists for RX buffers 2. Deeper RX packets parsing 3. Loopback option for TX packets 4. Extended VLAN headers handling 5. RSS processing for RX packets Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com> Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
Diffstat (limited to 'net')
-rw-r--r--net/checksum.c7
-rw-r--r--net/eth.c410
2 files changed, 373 insertions, 44 deletions
diff --git a/net/checksum.c b/net/checksum.c
index d0fa424cc1..196aaa3459 100644
--- a/net/checksum.c
+++ b/net/checksum.c
@@ -95,12 +95,11 @@ void net_checksum_calculate(uint8_t *data, int length)
uint32_t
net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt,
- uint32_t iov_off, uint32_t size)
+ uint32_t iov_off, uint32_t size, uint32_t csum_offset)
{
size_t iovec_off, buf_off;
unsigned int i;
uint32_t res = 0;
- uint32_t seq = 0;
iovec_off = 0;
buf_off = 0;
@@ -109,8 +108,8 @@ net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt,
size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size);
void *chunk_buf = iov[i].iov_base + (iov_off - iovec_off);
- res += net_checksum_add_cont(len, chunk_buf, seq);
- seq += len;
+ res += net_checksum_add_cont(len, chunk_buf, csum_offset);
+ csum_offset += len;
buf_off += len;
iov_off += len;
diff --git a/net/eth.c b/net/eth.c
index 7e32d274c7..95fe15c23f 100644
--- a/net/eth.c
+++ b/net/eth.c
@@ -21,8 +21,8 @@
#include "qemu-common.h"
#include "net/tap.h"
-void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
- bool *is_new)
+void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
+ uint16_t vlan_ethtype, bool *is_new)
{
struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
@@ -36,7 +36,7 @@ void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
default:
/* No VLAN header, put a new one */
vhdr->h_proto = ehdr->h_proto;
- ehdr->h_proto = cpu_to_be16(ETH_P_VLAN);
+ ehdr->h_proto = cpu_to_be16(vlan_ethtype);
*is_new = true;
break;
}
@@ -79,26 +79,100 @@ eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
}
-void eth_get_protocols(const uint8_t *headers,
- uint32_t hdr_length,
+uint16_t
+eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
+{
+ uint16_t proto;
+ size_t copied;
+ size_t size = iov_size(l2hdr_iov, iovcnt);
+ size_t proto_offset = l2hdr_len - sizeof(proto);
+
+ if (size < proto_offset) {
+ return ETH_P_UNKNOWN;
+ }
+
+ copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
+ &proto, sizeof(proto));
+
+ return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
+}
+
+static bool
+_eth_copy_chunk(size_t input_size,
+ const struct iovec *iov, int iovcnt,
+ size_t offset, size_t length,
+ void *buffer)
+{
+ size_t copied;
+
+ if (input_size < offset) {
+ return false;
+ }
+
+ copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
+
+ if (copied < length) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+_eth_tcp_has_data(bool is_ip4,
+ const struct ip_header *ip4_hdr,
+ const struct ip6_header *ip6_hdr,
+ size_t full_ip6hdr_len,
+ const struct tcp_header *tcp)
+{
+ uint32_t l4len;
+
+ if (is_ip4) {
+ l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
+ } else {
+ size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
+ l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
+ }
+
+ return l4len > TCP_HEADER_DATA_OFFSET(tcp);
+}
+
+void eth_get_protocols(const struct iovec *iov, int iovcnt,
bool *isip4, bool *isip6,
- bool *isudp, bool *istcp)
+ bool *isudp, bool *istcp,
+ size_t *l3hdr_off,
+ size_t *l4hdr_off,
+ size_t *l5hdr_off,
+ eth_ip6_hdr_info *ip6hdr_info,
+ eth_ip4_hdr_info *ip4hdr_info,
+ eth_l4_hdr_info *l4hdr_info)
{
int proto;
- size_t l2hdr_len = eth_get_l2_hdr_length(headers);
- assert(hdr_length >= eth_get_l2_hdr_length(headers));
+ bool fragment = false;
+ size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
+ size_t input_size = iov_size(iov, iovcnt);
+ size_t copied;
+
*isip4 = *isip6 = *isudp = *istcp = false;
- proto = eth_get_l3_proto(headers, l2hdr_len);
+ proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
+
+ *l3hdr_off = l2hdr_len;
+
if (proto == ETH_P_IP) {
- *isip4 = true;
+ struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
- struct ip_header *iphdr;
+ if (input_size < l2hdr_len) {
+ return;
+ }
+
+ copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
- assert(hdr_length >=
- eth_get_l2_hdr_length(headers) + sizeof(struct ip_header));
+ *isip4 = true;
- iphdr = PKT_GET_IP_HDR(headers);
+ if (copied < sizeof(*iphdr)) {
+ return;
+ }
if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
if (iphdr->ip_p == IP_PROTO_TCP) {
@@ -107,24 +181,135 @@ void eth_get_protocols(const uint8_t *headers,
*isudp = true;
}
}
- } else if (proto == ETH_P_IPV6) {
- uint8_t l4proto;
- size_t full_ip6hdr_len;
- struct iovec hdr_vec;
- hdr_vec.iov_base = (void *) headers;
- hdr_vec.iov_len = hdr_length;
+ ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
+ *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
+
+ fragment = ip4hdr_info->fragment;
+ } else if (proto == ETH_P_IPV6) {
*isip6 = true;
- if (eth_parse_ipv6_hdr(&hdr_vec, 1, l2hdr_len,
- &l4proto, &full_ip6hdr_len)) {
- if (l4proto == IP_PROTO_TCP) {
+ if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
+ ip6hdr_info)) {
+ if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
*istcp = true;
- } else if (l4proto == IP_PROTO_UDP) {
+ } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
*isudp = true;
}
+ } else {
+ return;
+ }
+
+ *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
+ fragment = ip6hdr_info->fragment;
+ }
+
+ if (!fragment) {
+ if (*istcp) {
+ *istcp = _eth_copy_chunk(input_size,
+ iov, iovcnt,
+ *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
+ &l4hdr_info->hdr.tcp);
+
+ if (istcp) {
+ *l5hdr_off = *l4hdr_off +
+ TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
+
+ l4hdr_info->has_tcp_data =
+ _eth_tcp_has_data(proto == ETH_P_IP,
+ &ip4hdr_info->ip4_hdr,
+ &ip6hdr_info->ip6_hdr,
+ *l4hdr_off - *l3hdr_off,
+ &l4hdr_info->hdr.tcp);
+ }
+ } else if (*isudp) {
+ *isudp = _eth_copy_chunk(input_size,
+ iov, iovcnt,
+ *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
+ &l4hdr_info->hdr.udp);
+ *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
+ }
+ }
+}
+
+bool
+eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
+ uint8_t *new_ehdr_buf,
+ uint16_t *payload_offset, uint16_t *tci)
+{
+ struct vlan_header vlan_hdr;
+ struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
+
+ size_t copied = iov_to_buf(iov, iovcnt, iovoff,
+ new_ehdr, sizeof(*new_ehdr));
+
+ if (copied < sizeof(*new_ehdr)) {
+ return false;
+ }
+
+ switch (be16_to_cpu(new_ehdr->h_proto)) {
+ case ETH_P_VLAN:
+ case ETH_P_DVLAN:
+ copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
+ &vlan_hdr, sizeof(vlan_hdr));
+
+ if (copied < sizeof(vlan_hdr)) {
+ return false;
+ }
+
+ new_ehdr->h_proto = vlan_hdr.h_proto;
+
+ *tci = be16_to_cpu(vlan_hdr.h_tci);
+ *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
+
+ if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
+
+ copied = iov_to_buf(iov, iovcnt, *payload_offset,
+ PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
+
+ if (copied < sizeof(vlan_hdr)) {
+ return false;
+ }
+
+ *payload_offset += sizeof(vlan_hdr);
+ }
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
+ uint16_t vet, uint8_t *new_ehdr_buf,
+ uint16_t *payload_offset, uint16_t *tci)
+{
+ struct vlan_header vlan_hdr;
+ struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
+
+ size_t copied = iov_to_buf(iov, iovcnt, iovoff,
+ new_ehdr, sizeof(*new_ehdr));
+
+ if (copied < sizeof(*new_ehdr)) {
+ return false;
+ }
+
+ if (be16_to_cpu(new_ehdr->h_proto) == vet) {
+ copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
+ &vlan_hdr, sizeof(vlan_hdr));
+
+ if (copied < sizeof(vlan_hdr)) {
+ return false;
}
+
+ new_ehdr->h_proto = vlan_hdr.h_proto;
+
+ *tci = be16_to_cpu(vlan_hdr.h_tci);
+ *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
+ return true;
}
+
+ return false;
}
void
@@ -133,7 +318,12 @@ eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
size_t l3payload_len,
size_t frag_offset, bool more_frags)
{
- if (eth_get_l3_proto(l2hdr, l2hdr_len) == ETH_P_IP) {
+ const struct iovec l2vec = {
+ .iov_base = (void *) l2hdr,
+ .iov_len = l2hdr_len
+ };
+
+ if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
uint16_t orig_flags;
struct ip_header *iphdr = (struct ip_header *) l3hdr;
uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
@@ -158,7 +348,9 @@ eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
}
uint32_t
-eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl)
+eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
+ uint16_t csl,
+ uint32_t *cso)
{
struct ip_pseudo_header ipph;
ipph.ip_src = iphdr->ip_src;
@@ -166,7 +358,26 @@ eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl)
ipph.ip_payload = cpu_to_be16(csl);
ipph.ip_proto = iphdr->ip_p;
ipph.zeros = 0;
- return net_checksum_add(sizeof(ipph), (uint8_t *) &ipph);
+ *cso = sizeof(ipph);
+ return net_checksum_add(*cso, (uint8_t *) &ipph);
+}
+
+uint32_t
+eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
+ uint16_t csl,
+ uint8_t l4_proto,
+ uint32_t *cso)
+{
+ struct ip6_pseudo_header ipph;
+ ipph.ip6_src = iphdr->ip6_src;
+ ipph.ip6_dst = iphdr->ip6_dst;
+ ipph.len = cpu_to_be16(csl);
+ ipph.zero[0] = 0;
+ ipph.zero[1] = 0;
+ ipph.zero[2] = 0;
+ ipph.next_hdr = l4_proto;
+ *cso = sizeof(ipph);
+ return net_checksum_add(*cso, (uint8_t *)&ipph);
}
static bool
@@ -186,33 +397,152 @@ eth_is_ip6_extension_header_type(uint8_t hdr_type)
}
}
-bool eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags,
- size_t ip6hdr_off, uint8_t *l4proto,
- size_t *full_hdr_len)
+static bool
+_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
+ size_t rthdr_offset,
+ struct ip6_ext_hdr *ext_hdr,
+ struct in6_address *dst_addr)
+{
+ struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
+
+ if ((rthdr->rtype == 2) &&
+ (rthdr->len == sizeof(struct in6_address) / 8) &&
+ (rthdr->segleft == 1)) {
+
+ size_t input_size = iov_size(pkt, pkt_frags);
+ size_t bytes_read;
+
+ if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
+ return false;
+ }
+
+ bytes_read = iov_to_buf(pkt, pkt_frags,
+ rthdr_offset + sizeof(*ext_hdr),
+ dst_addr, sizeof(dst_addr));
+
+ return bytes_read == sizeof(dst_addr);
+ }
+
+ return false;
+}
+
+static bool
+_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
+ size_t dsthdr_offset,
+ struct ip6_ext_hdr *ext_hdr,
+ struct in6_address *src_addr)
+{
+ size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
+ struct ip6_option_hdr opthdr;
+ size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
+
+ while (bytes_left > sizeof(opthdr)) {
+ size_t input_size = iov_size(pkt, pkt_frags);
+ size_t bytes_read, optlen;
+
+ if (input_size < opt_offset) {
+ return false;
+ }
+
+ bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
+ &opthdr, sizeof(opthdr));
+
+ if (bytes_read != sizeof(opthdr)) {
+ return false;
+ }
+
+ optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
+ : (opthdr.len + sizeof(opthdr));
+
+ if (optlen > bytes_left) {
+ return false;
+ }
+
+ if (opthdr.type == IP6_OPT_HOME) {
+ size_t input_size = iov_size(pkt, pkt_frags);
+
+ if (input_size < opt_offset + sizeof(opthdr)) {
+ return false;
+ }
+
+ bytes_read = iov_to_buf(pkt, pkt_frags,
+ opt_offset + sizeof(opthdr),
+ src_addr, sizeof(src_addr));
+
+ return bytes_read == sizeof(src_addr);
+ }
+
+ opt_offset += optlen;
+ bytes_left -= optlen;
+ }
+
+ return false;
+}
+
+bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
+ size_t ip6hdr_off, eth_ip6_hdr_info *info)
{
- struct ip6_header ip6_hdr;
struct ip6_ext_hdr ext_hdr;
size_t bytes_read;
+ uint8_t curr_ext_hdr_type;
+ size_t input_size = iov_size(pkt, pkt_frags);
+
+ info->rss_ex_dst_valid = false;
+ info->rss_ex_src_valid = false;
+ info->fragment = false;
+
+ if (input_size < ip6hdr_off) {
+ return false;
+ }
bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
- &ip6_hdr, sizeof(ip6_hdr));
- if (bytes_read < sizeof(ip6_hdr)) {
+ &info->ip6_hdr, sizeof(info->ip6_hdr));
+ if (bytes_read < sizeof(info->ip6_hdr)) {
return false;
}
- *full_hdr_len = sizeof(struct ip6_header);
+ info->full_hdr_len = sizeof(struct ip6_header);
+
+ curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
- if (!eth_is_ip6_extension_header_type(ip6_hdr.ip6_nxt)) {
- *l4proto = ip6_hdr.ip6_nxt;
+ if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
+ info->l4proto = info->ip6_hdr.ip6_nxt;
+ info->has_ext_hdrs = false;
return true;
}
+ info->has_ext_hdrs = true;
+
do {
- bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + *full_hdr_len,
+ if (input_size < ip6hdr_off + info->full_hdr_len) {
+ return false;
+ }
+
+ bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
&ext_hdr, sizeof(ext_hdr));
- *full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
- } while (eth_is_ip6_extension_header_type(ext_hdr.ip6r_nxt));
- *l4proto = ext_hdr.ip6r_nxt;
+ if (bytes_read < sizeof(ext_hdr)) {
+ return false;
+ }
+
+ if (curr_ext_hdr_type == IP6_ROUTING) {
+ info->rss_ex_dst_valid =
+ _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
+ ip6hdr_off + info->full_hdr_len,
+ &ext_hdr, &info->rss_ex_dst);
+ } else if (curr_ext_hdr_type == IP6_DESTINATON) {
+ info->rss_ex_src_valid =
+ _eth_get_rss_ex_src_addr(pkt, pkt_frags,
+ ip6hdr_off + info->full_hdr_len,
+ &ext_hdr, &info->rss_ex_src);
+ } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
+ info->fragment = true;
+ }
+
+ info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
+ curr_ext_hdr_type = ext_hdr.ip6r_nxt;
+ } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
+
+ info->l4proto = ext_hdr.ip6r_nxt;
return true;
}