aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-01-29 14:29:17 +0000
committerPeter Maydell <peter.maydell@linaro.org>2018-01-29 14:29:17 +0000
commit0d1442912b3d6591a57ee45fbf28bc91e58d9e76 (patch)
tree5150b12645c0d413fe8f3b1da83e607ffa497f4f
parentfccfcc6328d1986ce849183d37901a7cf59ed3ce (diff)
parentbf4835a4d5338bb7424827715df22570a8adc67c (diff)
Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Mon 29 Jan 2018 08:14:19 GMT # gpg: using RSA key 0xEF04965B398D6211 # gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211 * remotes/jasowang/tags/net-pull-request: MAINTAINERS: update Dmitry Fleytman email qemu-doc: Get rid of "vlan=X" example in the documentation net: Allow netdevs to be used with 'hostfwd_add' and 'hostfwd_remove' net: Allow hubports to connect to other netdevs colo: compare the packet based on the tcp sequence number colo: modified the payload compare function Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS8
-rw-r--r--hmp-commands.hx4
-rw-r--r--net/colo-compare.c411
-rw-r--r--net/colo.c9
-rw-r--r--net/colo.h15
-rw-r--r--net/hub.c27
-rw-r--r--net/hub.h3
-rw-r--r--net/net.c2
-rw-r--r--net/slirp.c33
-rw-r--r--net/trace-events2
-rw-r--r--qapi/net.json4
-rw-r--r--qemu-options.hx12
12 files changed, 347 insertions, 183 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 753e7996ce..fe39b30450 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1163,7 +1163,7 @@ F: hw/scsi/mfi.h
F: tests/megasas-test.c
Network packet abstractions
-M: Dmitry Fleytman <dmitry@daynix.com>
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
S: Maintained
F: include/net/eth.h
F: net/eth.c
@@ -1171,7 +1171,7 @@ F: hw/net/net_rx_pkt*
F: hw/net/net_tx_pkt*
Vmware
-M: Dmitry Fleytman <dmitry@daynix.com>
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
S: Maintained
F: hw/net/vmxnet*
F: hw/scsi/vmw_pvscsi*
@@ -1192,12 +1192,12 @@ F: hw/mem/nvdimm.c
F: include/hw/mem/nvdimm.h
e1000x
-M: Dmitry Fleytman <dmitry@daynix.com>
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
S: Maintained
F: hw/net/e1000x*
e1000e
-M: Dmitry Fleytman <dmitry@daynix.com>
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
S: Maintained
F: hw/net/e1000e*
diff --git a/hmp-commands.hx b/hmp-commands.hx
index c36a9ec465..15620c94d3 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1383,7 +1383,7 @@ ETEXI
{
.name = "hostfwd_add",
.args_type = "arg1:s,arg2:s?,arg3:s?",
- .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
+ .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
.help = "redirect TCP or UDP connections from host to guest (requires -net user)",
.cmd = hmp_hostfwd_add,
},
@@ -1398,7 +1398,7 @@ ETEXI
{
.name = "hostfwd_remove",
.args_type = "arg1:s,arg2:s?,arg3:s?",
- .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport",
+ .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport",
.help = "remove host-to-guest TCP or UDP redirection",
.cmd = hmp_hostfwd_remove,
},
diff --git a/net/colo-compare.c b/net/colo-compare.c
index 0ebdec936c..8622b0b35a 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -37,6 +37,9 @@
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
#define MAX_QUEUE_SIZE 1024
+#define COLO_COMPARE_FREE_PRIMARY 0x01
+#define COLO_COMPARE_FREE_SECONDARY 0x02
+
/* TODO: Should be configurable */
#define REGULAR_PACKET_CHECK_MS 3000
@@ -111,14 +114,32 @@ static gint seq_sorter(Packet *a, Packet *b, gpointer data)
return ntohl(atcp->th_seq) - ntohl(btcp->th_seq);
}
+static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
+{
+ Packet *pkt = data;
+ struct tcphdr *tcphd;
+
+ tcphd = (struct tcphdr *)pkt->transport_header;
+
+ pkt->tcp_seq = ntohl(tcphd->th_seq);
+ pkt->tcp_ack = ntohl(tcphd->th_ack);
+ *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack;
+ pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data
+ + (tcphd->th_off << 2) - pkt->vnet_hdr_len;
+ pkt->payload_size = pkt->size - pkt->header_size;
+ pkt->seq_end = pkt->tcp_seq + pkt->payload_size;
+ pkt->flags = tcphd->th_flags;
+}
+
/*
* Return 1 on success, if return 0 means the
* packet will be dropped
*/
-static int colo_insert_packet(GQueue *queue, Packet *pkt)
+static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
{
if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) {
if (pkt->ip->ip_p == IPPROTO_TCP) {
+ fill_pkt_tcp_info(pkt, max_ack);
g_queue_insert_sorted(queue,
pkt,
(GCompareDataFunc)seq_sorter,
@@ -168,12 +189,12 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
}
if (mode == PRIMARY_IN) {
- if (!colo_insert_packet(&conn->primary_list, pkt)) {
+ if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) {
error_report("colo compare primary queue size too big,"
"drop packet");
}
} else {
- if (!colo_insert_packet(&conn->secondary_list, pkt)) {
+ if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) {
error_report("colo compare secondary queue size too big,"
"drop packet");
}
@@ -183,6 +204,25 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
return 0;
}
+static inline bool after(uint32_t seq1, uint32_t seq2)
+{
+ return (int32_t)(seq1 - seq2) > 0;
+}
+
+static void colo_release_primary_pkt(CompareState *s, Packet *pkt)
+{
+ int ret;
+ ret = compare_chr_send(s,
+ pkt->data,
+ pkt->size,
+ pkt->vnet_hdr_len);
+ if (ret < 0) {
+ error_report("colo send primary packet failed");
+ }
+ trace_colo_compare_main("packet same and release packet");
+ packet_destroy(pkt, NULL);
+}
+
/*
* The IP packets sent by primary and secondary
* will be compared in here
@@ -190,10 +230,12 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
* return: 0 means packet same
* > 0 || < 0 means packet different
*/
-static int colo_packet_compare_common(Packet *ppkt,
- Packet *spkt,
- int poffset,
- int soffset)
+static int colo_compare_packet_payload(Packet *ppkt,
+ Packet *spkt,
+ uint16_t poffset,
+ uint16_t soffset,
+ uint16_t len)
+
{
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
@@ -208,131 +250,187 @@ static int colo_packet_compare_common(Packet *ppkt,
sec_ip_src, sec_ip_dst);
}
- poffset = ppkt->vnet_hdr_len + poffset;
- soffset = ppkt->vnet_hdr_len + soffset;
-
- if (ppkt->size - poffset == spkt->size - soffset) {
- return memcmp(ppkt->data + poffset,
- spkt->data + soffset,
- spkt->size - soffset);
- } else {
- trace_colo_compare_main("Net packet size are not the same");
- return -1;
- }
+ return memcmp(ppkt->data + poffset, spkt->data + soffset, len);
}
/*
- * Called from the compare thread on the primary
- * for compare tcp packet
- * compare_tcp copied from Dr. David Alan Gilbert's branch
- */
-static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
+ * return true means that the payload is consist and
+ * need to make the next comparison, false means do
+ * the checkpoint
+*/
+static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
+ int8_t *mark, uint32_t max_ack)
{
- struct tcphdr *ptcp, *stcp;
- int res;
+ *mark = 0;
+
+ if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
+ if (colo_compare_packet_payload(ppkt, spkt,
+ ppkt->header_size, spkt->header_size,
+ ppkt->payload_size)) {
+ *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
+ return true;
+ }
+ }
+ if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
+ if (colo_compare_packet_payload(ppkt, spkt,
+ ppkt->header_size, spkt->header_size,
+ ppkt->payload_size)) {
+ *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
+ return true;
+ }
+ }
+
+ /* one part of secondary packet payload still need to be compared */
+ if (!after(ppkt->seq_end, spkt->seq_end)) {
+ if (colo_compare_packet_payload(ppkt, spkt,
+ ppkt->header_size + ppkt->offset,
+ spkt->header_size + spkt->offset,
+ ppkt->payload_size - ppkt->offset)) {
+ if (!after(ppkt->tcp_ack, max_ack)) {
+ *mark = COLO_COMPARE_FREE_PRIMARY;
+ spkt->offset += ppkt->payload_size - ppkt->offset;
+ return true;
+ } else {
+ /* secondary guest hasn't ack the data, don't send
+ * out this packet
+ */
+ return false;
+ }
+ }
+ } else {
+ /* primary packet is longer than secondary packet, compare
+ * the same part and mark the primary packet offset
+ */
+ if (colo_compare_packet_payload(ppkt, spkt,
+ ppkt->header_size + ppkt->offset,
+ spkt->header_size + spkt->offset,
+ spkt->payload_size - spkt->offset)) {
+ *mark = COLO_COMPARE_FREE_SECONDARY;
+ ppkt->offset += spkt->payload_size - spkt->offset;
+ return true;
+ }
+ }
- trace_colo_compare_main("compare tcp");
+ return false;
+}
- ptcp = (struct tcphdr *)ppkt->transport_header;
- stcp = (struct tcphdr *)spkt->transport_header;
+static void colo_compare_tcp(CompareState *s, Connection *conn)
+{
+ Packet *ppkt = NULL, *spkt = NULL;
+ int8_t mark;
/*
- * The 'identification' field in the IP header is *very* random
- * it almost never matches. Fudge this by ignoring differences in
- * unfragmented packets; they'll normally sort themselves out if different
- * anyway, and it should recover at the TCP level.
- * An alternative would be to get both the primary and secondary to rewrite
- * somehow; but that would need some sync traffic to sync the state
- */
- if (ntohs(ppkt->ip->ip_off) & IP_DF) {
- spkt->ip->ip_id = ppkt->ip->ip_id;
- /* and the sum will be different if the IDs were different */
- spkt->ip->ip_sum = ppkt->ip->ip_sum;
+ * If ppkt and spkt have the same payload, but ppkt's ACK
+ * is greater than spkt's ACK, in this case we can not
+ * send the ppkt because it will cause the secondary guest
+ * to miss sending some data in the next. Therefore, we
+ * record the maximum ACK in the current queue at both
+ * primary side and secondary side. Only when the ack is
+ * less than the smaller of the two maximum ack, then we
+ * can ensure that the packet's payload is acknowledged by
+ * primary and secondary.
+ */
+ uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack;
+
+pri:
+ if (g_queue_is_empty(&conn->primary_list)) {
+ return;
+ }
+ ppkt = g_queue_pop_head(&conn->primary_list);
+sec:
+ if (g_queue_is_empty(&conn->secondary_list)) {
+ g_queue_push_head(&conn->primary_list, ppkt);
+ return;
}
+ spkt = g_queue_pop_head(&conn->secondary_list);
- /*
- * Check tcp header length for tcp option field.
- * th_off > 5 means this tcp packet have options field.
- * The tcp options maybe always different.
- * for example:
- * From RFC 7323.
- * TCP Timestamps option (TSopt):
- * Kind: 8
- *
- * Length: 10 bytes
- *
- * +-------+-------+---------------------+---------------------+
- * |Kind=8 | 10 | TS Value (TSval) |TS Echo Reply (TSecr)|
- * +-------+-------+---------------------+---------------------+
- * 1 1 4 4
- *
- * In this case the primary guest's timestamp always different with
- * the secondary guest's timestamp. COLO just focus on payload,
- * so we just need skip this field.
- */
- if (ptcp->th_off > 5) {
- ptrdiff_t ptcp_offset, stcp_offset;
+ if (ppkt->tcp_seq == ppkt->seq_end) {
+ colo_release_primary_pkt(s, ppkt);
+ ppkt = NULL;
+ }
- ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
- + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
- stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
- + (stcp->th_off * 4) - spkt->vnet_hdr_len;
+ if (ppkt && conn->compare_seq && !after(ppkt->seq_end, conn->compare_seq)) {
+ trace_colo_compare_main("pri: this packet has compared");
+ colo_release_primary_pkt(s, ppkt);
+ ppkt = NULL;
+ }
- /*
- * When network is busy, some tcp options(like sack) will unpredictable
- * occur in primary side or secondary side. it will make packet size
- * not same, but the two packet's payload is identical. colo just
- * care about packet payload, so we skip the option field.
- */
- res = colo_packet_compare_common(ppkt, spkt, ptcp_offset, stcp_offset);
- } else if (ptcp->th_sum == stcp->th_sum) {
- res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN, ETH_HLEN);
+ if (spkt->tcp_seq == spkt->seq_end) {
+ packet_destroy(spkt, NULL);
+ if (!ppkt) {
+ goto pri;
+ } else {
+ goto sec;
+ }
} else {
- res = -1;
+ if (conn->compare_seq && !after(spkt->seq_end, conn->compare_seq)) {
+ trace_colo_compare_main("sec: this packet has compared");
+ packet_destroy(spkt, NULL);
+ if (!ppkt) {
+ goto pri;
+ } else {
+ goto sec;
+ }
+ }
+ if (!ppkt) {
+ g_queue_push_head(&conn->secondary_list, spkt);
+ goto pri;
+ }
}
- if (res != 0 &&
- trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
- char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
-
- strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
- strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
- strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
- strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
-
- trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
- pri_ip_dst, spkt->size,
- sec_ip_src, sec_ip_dst);
-
- trace_colo_compare_tcp_info("pri tcp packet",
- ntohl(ptcp->th_seq),
- ntohl(ptcp->th_ack),
- res, ptcp->th_flags,
- ppkt->size);
-
- trace_colo_compare_tcp_info("sec tcp packet",
- ntohl(stcp->th_seq),
- ntohl(stcp->th_ack),
- res, stcp->th_flags,
- spkt->size);
+ if (colo_mark_tcp_pkt(ppkt, spkt, &mark, min_ack)) {
+ trace_colo_compare_tcp_info("pri",
+ ppkt->tcp_seq, ppkt->tcp_ack,
+ ppkt->header_size, ppkt->payload_size,
+ ppkt->offset, ppkt->flags);
+
+ trace_colo_compare_tcp_info("sec",
+ spkt->tcp_seq, spkt->tcp_ack,
+ spkt->header_size, spkt->payload_size,
+ spkt->offset, spkt->flags);
+
+ if (mark == COLO_COMPARE_FREE_PRIMARY) {
+ conn->compare_seq = ppkt->seq_end;
+ colo_release_primary_pkt(s, ppkt);
+ g_queue_push_head(&conn->secondary_list, spkt);
+ goto pri;
+ }
+ if (mark == COLO_COMPARE_FREE_SECONDARY) {
+ conn->compare_seq = spkt->seq_end;
+ packet_destroy(spkt, NULL);
+ goto sec;
+ }
+ if (mark == (COLO_COMPARE_FREE_PRIMARY | COLO_COMPARE_FREE_SECONDARY)) {
+ conn->compare_seq = ppkt->seq_end;
+ colo_release_primary_pkt(s, ppkt);
+ packet_destroy(spkt, NULL);
+ goto pri;
+ }
+ } else {
+ g_queue_push_head(&conn->primary_list, ppkt);
+ g_queue_push_head(&conn->secondary_list, spkt);
qemu_hexdump((char *)ppkt->data, stderr,
"colo-compare ppkt", ppkt->size);
qemu_hexdump((char *)spkt->data, stderr,
"colo-compare spkt", spkt->size);
- }
- return res;
+ /*
+ * colo_compare_inconsistent_notify();
+ * TODO: notice to checkpoint();
+ */
+ }
}
+
/*
* Called from the compare thread on the primary
* for compare udp packet
*/
static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
{
- int ret;
- int network_header_length = ppkt->ip->ip_hl * 4;
+ uint16_t network_header_length = ppkt->ip->ip_hl << 2;
+ uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
trace_colo_compare_main("compare udp");
@@ -346,11 +444,12 @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
* other field like TOS,TTL,IP Checksum. we only need to compare
* the ip payload here.
*/
- ret = colo_packet_compare_common(ppkt, spkt,
- network_header_length + ETH_HLEN,
- network_header_length + ETH_HLEN);
-
- if (ret) {
+ if (ppkt->size != spkt->size) {
+ trace_colo_compare_main("UDP: payload size of packets are different");
+ return -1;
+ }
+ if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
+ ppkt->size - offset)) {
trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
@@ -359,9 +458,10 @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
spkt->size);
}
+ return -1;
+ } else {
+ return 0;
}
-
- return ret;
}
/*
@@ -370,7 +470,8 @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
*/
static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
{
- int network_header_length = ppkt->ip->ip_hl * 4;
+ uint16_t network_header_length = ppkt->ip->ip_hl << 2;
+ uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
trace_colo_compare_main("compare icmp");
@@ -384,9 +485,12 @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
* other field like TOS,TTL,IP Checksum. we only need to compare
* the ip payload here.
*/
- if (colo_packet_compare_common(ppkt, spkt,
- network_header_length + ETH_HLEN,
- network_header_length + ETH_HLEN)) {
+ if (ppkt->size != spkt->size) {
+ trace_colo_compare_main("ICMP: payload size of packets are different");
+ return -1;
+ }
+ if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
+ ppkt->size - offset)) {
trace_colo_compare_icmp_miscompare("primary pkt size",
ppkt->size);
trace_colo_compare_icmp_miscompare("Secondary pkt size",
@@ -409,6 +513,8 @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
*/
static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
{
+ uint16_t offset = ppkt->vnet_hdr_len;
+
trace_colo_compare_main("compare other");
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
@@ -423,7 +529,12 @@ static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
sec_ip_src, sec_ip_dst);
}
- return colo_packet_compare_common(ppkt, spkt, 0, 0);
+ if (ppkt->size != spkt->size) {
+ trace_colo_compare_main("Other: payload size of packets are different");
+ return -1;
+ }
+ return colo_compare_packet_payload(ppkt, spkt, offset, offset,
+ ppkt->size - offset);
}
static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
@@ -477,53 +588,22 @@ static void colo_old_packet_check(void *opaque)
(GCompareFunc)colo_old_packet_check_one_conn);
}
-/*
- * Called from the compare thread on the primary
- * for compare packet with secondary list of the
- * specified connection when a new packet was
- * queued to it.
- */
-static void colo_compare_connection(void *opaque, void *user_data)
+static void colo_compare_packet(CompareState *s, Connection *conn,
+ int (*HandlePacket)(Packet *spkt,
+ Packet *ppkt))
{
- CompareState *s = user_data;
- Connection *conn = opaque;
Packet *pkt = NULL;
GList *result = NULL;
- int ret;
while (!g_queue_is_empty(&conn->primary_list) &&
!g_queue_is_empty(&conn->secondary_list)) {
pkt = g_queue_pop_head(&conn->primary_list);
- switch (conn->ip_proto) {
- case IPPROTO_TCP:
- result = g_queue_find_custom(&conn->secondary_list,
- pkt, (GCompareFunc)colo_packet_compare_tcp);
- break;
- case IPPROTO_UDP:
- result = g_queue_find_custom(&conn->secondary_list,
- pkt, (GCompareFunc)colo_packet_compare_udp);
- break;
- case IPPROTO_ICMP:
- result = g_queue_find_custom(&conn->secondary_list,
- pkt, (GCompareFunc)colo_packet_compare_icmp);
- break;
- default:
- result = g_queue_find_custom(&conn->secondary_list,
- pkt, (GCompareFunc)colo_packet_compare_other);
- break;
- }
+ result = g_queue_find_custom(&conn->secondary_list,
+ pkt, (GCompareFunc)HandlePacket);
if (result) {
- ret = compare_chr_send(s,
- pkt->data,
- pkt->size,
- pkt->vnet_hdr_len);
- if (ret < 0) {
- error_report("colo_send_primary_packet failed");
- }
- trace_colo_compare_main("packet same and release packet");
+ colo_release_primary_pkt(s, pkt);
g_queue_remove(&conn->secondary_list, result->data);
- packet_destroy(pkt, NULL);
} else {
/*
* If one packet arrive late, the secondary_list or
@@ -538,6 +618,33 @@ static void colo_compare_connection(void *opaque, void *user_data)
}
}
+/*
+ * Called from the compare thread on the primary
+ * for compare packet with secondary list of the
+ * specified connection when a new packet was
+ * queued to it.
+ */
+static void colo_compare_connection(void *opaque, void *user_data)
+{
+ CompareState *s = user_data;
+ Connection *conn = opaque;
+
+ switch (conn->ip_proto) {
+ case IPPROTO_TCP:
+ colo_compare_tcp(s, conn);
+ break;
+ case IPPROTO_UDP:
+ colo_compare_packet(s, conn, colo_packet_compare_udp);
+ break;
+ case IPPROTO_ICMP:
+ colo_compare_packet(s, conn, colo_packet_compare_icmp);
+ break;
+ default:
+ colo_compare_packet(s, conn, colo_packet_compare_other);
+ break;
+ }
+}
+
static int compare_chr_send(CompareState *s,
const uint8_t *buf,
uint32_t size,
diff --git a/net/colo.c b/net/colo.c
index a39d600f34..842626502e 100644
--- a/net/colo.c
+++ b/net/colo.c
@@ -138,6 +138,8 @@ Connection *connection_new(ConnectionKey *key)
conn->processing = false;
conn->offset = 0;
conn->syn_flag = 0;
+ conn->pack = 0;
+ conn->sack = 0;
g_queue_init(&conn->primary_list);
g_queue_init(&conn->secondary_list);
@@ -163,6 +165,13 @@ Packet *packet_new(const void *data, int size, int vnet_hdr_len)
pkt->size = size;
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
pkt->vnet_hdr_len = vnet_hdr_len;
+ pkt->tcp_seq = 0;
+ pkt->tcp_ack = 0;
+ pkt->seq_end = 0;
+ pkt->header_size = 0;
+ pkt->payload_size = 0;
+ pkt->offset = 0;
+ pkt->flags = 0;
return pkt;
}
diff --git a/net/colo.h b/net/colo.h
index 0658e869b4..da6c36dcf7 100644
--- a/net/colo.h
+++ b/net/colo.h
@@ -45,6 +45,15 @@ typedef struct Packet {
int64_t creation_ms;
/* Get vnet_hdr_len from filter */
uint32_t vnet_hdr_len;
+ uint32_t tcp_seq; /* sequence number */
+ uint32_t tcp_ack; /* acknowledgement number */
+ /* the sequence number of the last byte of the packet */
+ uint32_t seq_end;
+ uint8_t header_size; /* the header length */
+ uint16_t payload_size; /* the payload length */
+ /* record the payload offset(the length that has been compared) */
+ uint16_t offset;
+ uint8_t flags; /* Flags(aka Control bits) */
} Packet;
typedef struct ConnectionKey {
@@ -64,6 +73,12 @@ typedef struct Connection {
/* flag to enqueue unprocessed_connections */
bool processing;
uint8_t ip_proto;
+ /* record the sequence number that has been compared */
+ uint32_t compare_seq;
+ /* the maximum of acknowledgement number in primary_list queue */
+ uint32_t pack;
+ /* the maximum of acknowledgement number in secondary_list queue */
+ uint32_t sack;
/* offset = secondary_seq - primary_seq */
tcp_seq offset;
/*
diff --git a/net/hub.c b/net/hub.c
index 14b4eec68f..5e84a9ad93 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -13,6 +13,7 @@
*/
#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "monitor/monitor.h"
#include "net/net.h"
#include "clients.h"
@@ -140,7 +141,8 @@ static NetClientInfo net_hub_port_info = {
.cleanup = net_hub_port_cleanup,
};
-static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
+static NetHubPort *net_hub_port_new(NetHub *hub, const char *name,
+ NetClientState *hubpeer)
{
NetClientState *nc;
NetHubPort *port;
@@ -153,7 +155,7 @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
name = default_name;
}
- nc = qemu_new_net_client(&net_hub_port_info, NULL, "hub", name);
+ nc = qemu_new_net_client(&net_hub_port_info, hubpeer, "hub", name);
port = DO_UPCAST(NetHubPort, nc, nc);
port->id = id;
port->hub = hub;
@@ -165,11 +167,14 @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
/**
* Create a port on a given hub
+ * @hub_id: Number of the hub
* @name: Net client name or NULL for default name.
+ * @hubpeer: Peer to use (if "netdev=id" has been specified)
*
* If there is no existing hub with the given id then a new hub is created.
*/
-NetClientState *net_hub_add_port(int hub_id, const char *name)
+NetClientState *net_hub_add_port(int hub_id, const char *name,
+ NetClientState *hubpeer)
{
NetHub *hub;
NetHubPort *port;
@@ -184,7 +189,7 @@ NetClientState *net_hub_add_port(int hub_id, const char *name)
hub = net_hub_new(hub_id);
}
- port = net_hub_port_new(hub, name);
+ port = net_hub_port_new(hub, name, hubpeer);
return &port->nc;
}
@@ -232,7 +237,7 @@ NetClientState *net_hub_port_find(int hub_id)
}
}
- nc = net_hub_add_port(hub_id, NULL);
+ nc = net_hub_add_port(hub_id, NULL, NULL);
return nc;
}
@@ -286,12 +291,22 @@ int net_init_hubport(const Netdev *netdev, const char *name,
NetClientState *peer, Error **errp)
{
const NetdevHubPortOptions *hubport;
+ NetClientState *hubpeer = NULL;
assert(netdev->type == NET_CLIENT_DRIVER_HUBPORT);
assert(!peer);
hubport = &netdev->u.hubport;
- net_hub_add_port(hubport->hubid, name);
+ if (hubport->has_netdev) {
+ hubpeer = qemu_find_netdev(hubport->netdev);
+ if (!hubpeer) {
+ error_setg(errp, "netdev '%s' not found", hubport->netdev);
+ return -1;
+ }
+ }
+
+ net_hub_add_port(hubport->hubid, name, hubpeer);
+
return 0;
}
diff --git a/net/hub.h b/net/hub.h
index a625effe00..6a16f0487a 100644
--- a/net/hub.h
+++ b/net/hub.h
@@ -17,7 +17,8 @@
#include "qemu-common.h"
-NetClientState *net_hub_add_port(int hub_id, const char *name);
+NetClientState *net_hub_add_port(int hub_id, const char *name,
+ NetClientState *hubpeer);
NetClientState *net_hub_find_client_by_name(int hub_id, const char *name);
void net_hub_info(Monitor *mon);
void net_hub_check_clients(void);
diff --git a/net/net.c b/net/net.c
index 2b81c93193..e1569e7d89 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1063,7 +1063,7 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
/* Do not add to a vlan if it's a nic with a netdev= parameter. */
if (netdev->type != NET_CLIENT_DRIVER_NIC ||
!opts->u.nic.has_netdev) {
- peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL);
+ peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL, NULL);
}
if (net->has_vlan && !vlan_warned) {
diff --git a/net/slirp.c b/net/slirp.c
index cb8ca2312f..7044d292c8 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -405,16 +405,23 @@ error:
return -1;
}
-static SlirpState *slirp_lookup(Monitor *mon, const char *vlan,
- const char *stack)
+static SlirpState *slirp_lookup(Monitor *mon, const char *hub_id,
+ const char *name)
{
-
- if (vlan) {
+ if (name) {
NetClientState *nc;
- nc = net_hub_find_client_by_name(strtol(vlan, NULL, 0), stack);
- if (!nc) {
- monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n");
- return NULL;
+ if (hub_id) {
+ nc = net_hub_find_client_by_name(strtol(hub_id, NULL, 0), name);
+ if (!nc) {
+ monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n");
+ return NULL;
+ }
+ } else {
+ nc = qemu_find_netdev(name);
+ if (!nc) {
+ monitor_printf(mon, "unrecognized netdev id '%s'\n", name);
+ return NULL;
+ }
}
if (strcmp(nc->model, "user")) {
monitor_printf(mon, "invalid device specified\n");
@@ -443,9 +450,12 @@ void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict)
const char *arg2 = qdict_get_try_str(qdict, "arg2");
const char *arg3 = qdict_get_try_str(qdict, "arg3");
- if (arg2) {
+ if (arg3) {
s = slirp_lookup(mon, arg1, arg2);
src_str = arg3;
+ } else if (arg2) {
+ s = slirp_lookup(mon, NULL, arg1);
+ src_str = arg2;
} else {
s = slirp_lookup(mon, NULL, NULL);
src_str = arg1;
@@ -570,9 +580,12 @@ void hmp_hostfwd_add(Monitor *mon, const QDict *qdict)
const char *arg2 = qdict_get_try_str(qdict, "arg2");
const char *arg3 = qdict_get_try_str(qdict, "arg3");
- if (arg2) {
+ if (arg3) {
s = slirp_lookup(mon, arg1, arg2);
redir_str = arg3;
+ } else if (arg2) {
+ s = slirp_lookup(mon, NULL, arg1);
+ redir_str = arg2;
} else {
s = slirp_lookup(mon, NULL, NULL);
redir_str = arg1;
diff --git a/net/trace-events b/net/trace-events
index 938263dd7a..7b594cfdd2 100644
--- a/net/trace-events
+++ b/net/trace-events
@@ -13,7 +13,7 @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d"
colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
colo_old_packet_check_found(int64_t old_time) "%" PRId64
colo_compare_miscompare(void) ""
-colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int res, uint32_t flag, int size) "side: %s seq/ack= %u/%u res= %d flags= 0x%x pkt_size: %d\n"
+colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int hdlen, int pdlen, int offset, int flags) "%s: seq/ack= %u/%u hdlen= %d pdlen= %d offset= %d flags=%d\n"
# net/filter-rewriter.c
colo_filter_rewriter_debug(void) ""
diff --git a/qapi/net.json b/qapi/net.json
index 4beff5d582..1238ba5de1 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -410,12 +410,14 @@
# Connect two or more net clients through a software hub.
#
# @hubid: hub identifier number
+# @netdev: used to connect hub to a netdev instead of a device (since 2.12)
#
# Since: 1.2
##
{ 'struct': 'NetdevHubPortOptions',
'data': {
- 'hubid': 'int32' } }
+ 'hubid': 'int32',
+ '*netdev': 'str' } }
##
# @NetdevNetmapOptions:
diff --git a/qemu-options.hx b/qemu-options.hx
index 1d73fb151d..8ce427da78 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2000,7 +2000,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
#endif
"-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
" configure a vhost-user network, backed by a chardev 'dev'\n"
- "-netdev hubport,id=str,hubid=n\n"
+ "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
" configure a hub port on QEMU VLAN 'n'\n", QEMU_ARCH_ALL)
DEF("net", HAS_ARG, QEMU_OPTION_net,
"-net nic[,vlan=n][,netdev=nd][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v]\n"
@@ -2242,8 +2242,8 @@ qemu-system-i386 linux.img -net nic -net tap
#launch a QEMU instance with two NICs, each one connected
#to a TAP device
qemu-system-i386 linux.img \
- -net nic,vlan=0 -net tap,vlan=0,ifname=tap0 \
- -net nic,vlan=1 -net tap,vlan=1,ifname=tap1
+ -netdev tap,id=nd0,ifname=tap0 -device e1000,netdev=nd0 \
+ -netdev tap,id=nd1,ifname=tap1 -device rtl8139,netdev=nd1
@end example
@example
@@ -2428,13 +2428,15 @@ vde_switch -F -sock /tmp/myswitch
qemu-system-i386 linux.img -net nic -net vde,sock=/tmp/myswitch
@end example
-@item -netdev hubport,id=@var{id},hubid=@var{hubid}
+@item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}]
Create a hub port on QEMU "vlan" @var{hubid}.
The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
netdev. @code{-net} and @code{-device} with parameter @option{vlan} create the
-required hub automatically.
+required hub automatically. Alternatively, you can also connect the hubport
+to another netdev with ID @var{nd} by using the @option{netdev=@var{nd}}
+option.
@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]