blob: ca67c68615f455622f8a377d4417492f758e46a1 [file] [log] [blame]
Zhang Chen7dce4e62016-09-27 10:22:26 +08001/*
2 * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
3 * (a.k.a. Fault Tolerance or Continuous Replication)
4 *
5 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
6 * Copyright (c) 2016 FUJITSU LIMITED
7 * Copyright (c) 2016 Intel Corporation
8 *
9 * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or
12 * later. See the COPYING file in the top-level directory.
13 */
14
15#include "qemu/osdep.h"
16#include "qemu/error-report.h"
Zhang Chen59509ec2016-09-27 10:22:27 +080017#include "trace.h"
Zhang Chen7dce4e62016-09-27 10:22:26 +080018#include "qemu-common.h"
19#include "qapi/qmp/qerror.h"
20#include "qapi/error.h"
21#include "net/net.h"
Zhang Chenf4b61832016-09-27 10:22:31 +080022#include "net/eth.h"
Zhang Chen7dce4e62016-09-27 10:22:26 +080023#include "qom/object_interfaces.h"
24#include "qemu/iov.h"
25#include "qom/object.h"
26#include "qemu/typedefs.h"
27#include "net/queue.h"
Marc-André Lureau4d43a602017-01-26 18:26:44 +040028#include "chardev/char-fe.h"
Zhang Chen7dce4e62016-09-27 10:22:26 +080029#include "qemu/sockets.h"
30#include "qapi-visit.h"
Zhang Chen59509ec2016-09-27 10:22:27 +080031#include "net/colo.h"
Zhang Chen7dce4e62016-09-27 10:22:26 +080032
33#define TYPE_COLO_COMPARE "colo-compare"
34#define COLO_COMPARE(obj) \
35 OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
36
Zhang Chen0682e152016-09-27 10:22:30 +080037#define COMPARE_READ_LEN_MAX NET_BUFSIZE
Zhang Chenb6540d42016-09-27 10:22:29 +080038#define MAX_QUEUE_SIZE 1024
39
Zhang Chen0682e152016-09-27 10:22:30 +080040/* TODO: Should be configurable */
41#define REGULAR_PACKET_CHECK_MS 3000
42
Zhang Chen59509ec2016-09-27 10:22:27 +080043/*
44 + CompareState ++
45 | |
46 +---------------+ +---------------+ +---------------+
47 |conn list +--->conn +--------->conn |
48 +---------------+ +---------------+ +---------------+
49 | | | | | |
50 +---------------+ +---v----+ +---v----+ +---v----+ +---v----+
51 |primary | |secondary |primary | |secondary
52 |packet | |packet + |packet | |packet +
53 +--------+ +--------+ +--------+ +--------+
54 | | | |
55 +---v----+ +---v----+ +---v----+ +---v----+
56 |primary | |secondary |primary | |secondary
57 |packet | |packet + |packet | |packet +
58 +--------+ +--------+ +--------+ +--------+
59 | | | |
60 +---v----+ +---v----+ +---v----+ +---v----+
61 |primary | |secondary |primary | |secondary
62 |packet | |packet + |packet | |packet +
63 +--------+ +--------+ +--------+ +--------+
64*/
Zhang Chen7dce4e62016-09-27 10:22:26 +080065typedef struct CompareState {
66 Object parent;
67
68 char *pri_indev;
69 char *sec_indev;
70 char *outdev;
Marc-André Lureau32a6ebe2016-10-22 12:52:52 +030071 CharBackend chr_pri_in;
72 CharBackend chr_sec_in;
73 CharBackend chr_out;
Zhang Chen7dce4e62016-09-27 10:22:26 +080074 SocketReadState pri_rs;
75 SocketReadState sec_rs;
Zhang Chenaa3a7032017-07-04 14:53:52 +080076 bool vnet_hdr;
Zhang Chen59509ec2016-09-27 10:22:27 +080077
Zhang Chenb6540d42016-09-27 10:22:29 +080078 /* connection list: the connections belonged to this NIC could be found
79 * in this list.
80 * element type: Connection
81 */
82 GQueue conn_list;
Zhang Chen59509ec2016-09-27 10:22:27 +080083 /* hashtable to save connection */
84 GHashTable *connection_track_table;
Zhang Chen0682e152016-09-27 10:22:30 +080085 /* compare thread, a thread for each NIC */
86 QemuThread thread;
zhanghailiangdfd917a2017-02-17 10:53:12 +080087
zhanghailiangb43decb2017-02-17 10:53:14 +080088 GMainContext *worker_context;
zhanghailiangdfd917a2017-02-17 10:53:12 +080089 GMainLoop *compare_loop;
Zhang Chen7dce4e62016-09-27 10:22:26 +080090} CompareState;
91
92typedef struct CompareClass {
93 ObjectClass parent_class;
94} CompareClass;
95
Zhang Chen59509ec2016-09-27 10:22:27 +080096enum {
97 PRIMARY_IN = 0,
98 SECONDARY_IN,
99};
100
Zhang Chen3037e7a2017-07-04 14:53:51 +0800101static int compare_chr_send(CompareState *s,
Zhang Chen59509ec2016-09-27 10:22:27 +0800102 const uint8_t *buf,
Zhang Chenaa3a7032017-07-04 14:53:52 +0800103 uint32_t size,
104 uint32_t vnet_hdr_len);
Zhang Chen59509ec2016-09-27 10:22:27 +0800105
Zhang Chena935cc32017-01-24 16:53:46 +0800106static gint seq_sorter(Packet *a, Packet *b, gpointer data)
107{
108 struct tcphdr *atcp, *btcp;
109
110 atcp = (struct tcphdr *)(a->transport_header);
111 btcp = (struct tcphdr *)(b->transport_header);
112 return ntohl(atcp->th_seq) - ntohl(btcp->th_seq);
113}
114
Zhang Chen59509ec2016-09-27 10:22:27 +0800115/*
116 * Return 0 on success, if return -1 means the pkt
117 * is unsupported(arp and ipv6) and will be sent later
118 */
119static int packet_enqueue(CompareState *s, int mode)
120{
Zhang Chenb6540d42016-09-27 10:22:29 +0800121 ConnectionKey key;
Zhang Chen59509ec2016-09-27 10:22:27 +0800122 Packet *pkt = NULL;
Zhang Chenb6540d42016-09-27 10:22:29 +0800123 Connection *conn;
Zhang Chen59509ec2016-09-27 10:22:27 +0800124
125 if (mode == PRIMARY_IN) {
Zhang Chenada1a332017-07-04 14:53:50 +0800126 pkt = packet_new(s->pri_rs.buf,
127 s->pri_rs.packet_len,
128 s->pri_rs.vnet_hdr_len);
Zhang Chen59509ec2016-09-27 10:22:27 +0800129 } else {
Zhang Chenada1a332017-07-04 14:53:50 +0800130 pkt = packet_new(s->sec_rs.buf,
131 s->sec_rs.packet_len,
132 s->sec_rs.vnet_hdr_len);
Zhang Chen59509ec2016-09-27 10:22:27 +0800133 }
134
135 if (parse_packet_early(pkt)) {
136 packet_destroy(pkt, NULL);
137 pkt = NULL;
138 return -1;
139 }
Zhang Chenb6540d42016-09-27 10:22:29 +0800140 fill_connection_key(pkt, &key);
Zhang Chen59509ec2016-09-27 10:22:27 +0800141
Zhang Chenb6540d42016-09-27 10:22:29 +0800142 conn = connection_get(s->connection_track_table,
143 &key,
144 &s->conn_list);
Zhang Chen59509ec2016-09-27 10:22:27 +0800145
Zhang Chenb6540d42016-09-27 10:22:29 +0800146 if (!conn->processing) {
147 g_queue_push_tail(&s->conn_list, conn);
148 conn->processing = true;
149 }
150
151 if (mode == PRIMARY_IN) {
152 if (g_queue_get_length(&conn->primary_list) <=
153 MAX_QUEUE_SIZE) {
154 g_queue_push_tail(&conn->primary_list, pkt);
Zhang Chena935cc32017-01-24 16:53:46 +0800155 if (conn->ip_proto == IPPROTO_TCP) {
156 g_queue_sort(&conn->primary_list,
157 (GCompareDataFunc)seq_sorter,
158 NULL);
159 }
Zhang Chenb6540d42016-09-27 10:22:29 +0800160 } else {
161 error_report("colo compare primary queue size too big,"
162 "drop packet");
163 }
164 } else {
165 if (g_queue_get_length(&conn->secondary_list) <=
166 MAX_QUEUE_SIZE) {
167 g_queue_push_tail(&conn->secondary_list, pkt);
Zhang Chena935cc32017-01-24 16:53:46 +0800168 if (conn->ip_proto == IPPROTO_TCP) {
169 g_queue_sort(&conn->secondary_list,
170 (GCompareDataFunc)seq_sorter,
171 NULL);
172 }
Zhang Chenb6540d42016-09-27 10:22:29 +0800173 } else {
174 error_report("colo compare secondary queue size too big,"
175 "drop packet");
176 }
177 }
Zhang Chen59509ec2016-09-27 10:22:27 +0800178
179 return 0;
180}
181
Zhang Chen0682e152016-09-27 10:22:30 +0800182/*
183 * The IP packets sent by primary and secondary
184 * will be compared in here
185 * TODO support ip fragment, Out-Of-Order
186 * return: 0 means packet same
187 * > 0 || < 0 means packet different
188 */
Zhang Chen6efeb322017-03-02 17:54:17 +0800189static int colo_packet_compare_common(Packet *ppkt, Packet *spkt, int offset)
Zhang Chen0682e152016-09-27 10:22:30 +0800190{
Zhang Chene630b2b2017-03-09 15:40:07 +0800191 if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
192 char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
193
194 strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
195 strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
196 strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
197 strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
198
199 trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
200 pri_ip_dst, spkt->size,
201 sec_ip_src, sec_ip_dst);
202 }
Zhang Chen0682e152016-09-27 10:22:30 +0800203
Zhang Chend63b3662017-07-04 14:53:54 +0800204 offset = ppkt->vnet_hdr_len + offset;
205
Zhang Chen0682e152016-09-27 10:22:30 +0800206 if (ppkt->size == spkt->size) {
Zhang Chend63b3662017-07-04 14:53:54 +0800207 return memcmp(ppkt->data + offset,
208 spkt->data + offset,
Zhang Chen6efeb322017-03-02 17:54:17 +0800209 spkt->size - offset);
Zhang Chen0682e152016-09-27 10:22:30 +0800210 } else {
Zhang Chen2ad7ca42017-03-02 17:54:16 +0800211 trace_colo_compare_main("Net packet size are not the same");
Zhang Chen0682e152016-09-27 10:22:30 +0800212 return -1;
213 }
214}
215
Zhang Chenf4b61832016-09-27 10:22:31 +0800216/*
217 * Called from the compare thread on the primary
218 * for compare tcp packet
219 * compare_tcp copied from Dr. David Alan Gilbert's branch
220 */
221static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
Zhang Chen0682e152016-09-27 10:22:30 +0800222{
Zhang Chenf4b61832016-09-27 10:22:31 +0800223 struct tcphdr *ptcp, *stcp;
224 int res;
Zhang Chenf4b61832016-09-27 10:22:31 +0800225
226 trace_colo_compare_main("compare tcp");
Zhang Chen2ad7ca42017-03-02 17:54:16 +0800227
Zhang Chenf4b61832016-09-27 10:22:31 +0800228 ptcp = (struct tcphdr *)ppkt->transport_header;
229 stcp = (struct tcphdr *)spkt->transport_header;
230
231 /*
232 * The 'identification' field in the IP header is *very* random
233 * it almost never matches. Fudge this by ignoring differences in
234 * unfragmented packets; they'll normally sort themselves out if different
235 * anyway, and it should recover at the TCP level.
236 * An alternative would be to get both the primary and secondary to rewrite
237 * somehow; but that would need some sync traffic to sync the state
238 */
239 if (ntohs(ppkt->ip->ip_off) & IP_DF) {
240 spkt->ip->ip_id = ppkt->ip->ip_id;
241 /* and the sum will be different if the IDs were different */
242 spkt->ip->ip_sum = ppkt->ip->ip_sum;
243 }
244
Zhang Chen184d4d42017-04-18 10:20:19 +0800245 /*
246 * Check tcp header length for tcp option field.
247 * th_off > 5 means this tcp packet have options field.
248 * The tcp options maybe always different.
249 * for example:
250 * From RFC 7323.
251 * TCP Timestamps option (TSopt):
252 * Kind: 8
253 *
254 * Length: 10 bytes
255 *
256 * +-------+-------+---------------------+---------------------+
257 * |Kind=8 | 10 | TS Value (TSval) |TS Echo Reply (TSecr)|
258 * +-------+-------+---------------------+---------------------+
259 * 1 1 4 4
260 *
261 * In this case the primary guest's timestamp always different with
262 * the secondary guest's timestamp. COLO just focus on payload,
263 * so we just need skip this field.
264 */
265 if (ptcp->th_off > 5) {
266 ptrdiff_t tcp_offset;
Zhang Chend63b3662017-07-04 14:53:54 +0800267
Zhang Chen184d4d42017-04-18 10:20:19 +0800268 tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
Zhang Chend63b3662017-07-04 14:53:54 +0800269 + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
Zhang Chen184d4d42017-04-18 10:20:19 +0800270 res = colo_packet_compare_common(ppkt, spkt, tcp_offset);
271 } else if (ptcp->th_sum == stcp->th_sum) {
Zhang Chen6efeb322017-03-02 17:54:17 +0800272 res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN);
273 } else {
274 res = -1;
275 }
Zhang Chenf4b61832016-09-27 10:22:31 +0800276
Peter Maydell51b9d492017-04-26 16:19:27 +0100277 if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
Zhang Chenf583dca2017-04-27 11:46:45 +0800278 char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
Alex Bennée2dfe5112016-10-28 14:25:59 +0100279
Zhang Chenf583dca2017-04-27 11:46:45 +0800280 strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
281 strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
282 strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
283 strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
284
285 trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
286 pri_ip_dst, spkt->size,
287 sec_ip_src, sec_ip_dst);
288
289 trace_colo_compare_tcp_info("pri tcp packet",
290 ntohl(ptcp->th_seq),
291 ntohl(ptcp->th_ack),
292 res, ptcp->th_flags,
293 ppkt->size);
294
295 trace_colo_compare_tcp_info("sec tcp packet",
296 ntohl(stcp->th_seq),
297 ntohl(stcp->th_ack),
298 res, stcp->th_flags,
299 spkt->size);
Zhang Chenf4b61832016-09-27 10:22:31 +0800300
Zhang Chen2061c142016-10-17 17:23:59 +0800301 qemu_hexdump((char *)ppkt->data, stderr,
302 "colo-compare ppkt", ppkt->size);
303 qemu_hexdump((char *)spkt->data, stderr,
304 "colo-compare spkt", spkt->size);
Zhang Chenf4b61832016-09-27 10:22:31 +0800305 }
306
307 return res;
308}
309
310/*
311 * Called from the compare thread on the primary
312 * for compare udp packet
313 */
314static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
315{
316 int ret;
Zhang Chen6efeb322017-03-02 17:54:17 +0800317 int network_header_length = ppkt->ip->ip_hl * 4;
Zhang Chenf4b61832016-09-27 10:22:31 +0800318
319 trace_colo_compare_main("compare udp");
Zhang Chen2ad7ca42017-03-02 17:54:16 +0800320
Zhang Chen6efeb322017-03-02 17:54:17 +0800321 /*
322 * Because of ppkt and spkt are both in the same connection,
323 * The ppkt's src ip, dst ip, src port, dst port, ip_proto all are
324 * same with spkt. In addition, IP header's Identification is a random
325 * field, we can handle it in IP fragmentation function later.
326 * COLO just concern the response net packet payload from primary guest
327 * and secondary guest are same or not, So we ignored all IP header include
328 * other field like TOS,TTL,IP Checksum. we only need to compare
329 * the ip payload here.
330 */
331 ret = colo_packet_compare_common(ppkt, spkt,
332 network_header_length + ETH_HLEN);
Zhang Chenf4b61832016-09-27 10:22:31 +0800333
334 if (ret) {
335 trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
Zhang Chenf4b61832016-09-27 10:22:31 +0800336 trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
Zhang Chen1723a7f2017-03-02 17:54:18 +0800337 if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
338 qemu_hexdump((char *)ppkt->data, stderr, "colo-compare pri pkt",
339 ppkt->size);
340 qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
341 spkt->size);
342 }
Zhang Chenf4b61832016-09-27 10:22:31 +0800343 }
344
345 return ret;
346}
347
348/*
349 * Called from the compare thread on the primary
350 * for compare icmp packet
351 */
352static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
353{
Zhang Chen6efeb322017-03-02 17:54:17 +0800354 int network_header_length = ppkt->ip->ip_hl * 4;
355
Zhang Chenf4b61832016-09-27 10:22:31 +0800356 trace_colo_compare_main("compare icmp");
Zhang Chenf4b61832016-09-27 10:22:31 +0800357
Zhang Chen6efeb322017-03-02 17:54:17 +0800358 /*
359 * Because of ppkt and spkt are both in the same connection,
360 * The ppkt's src ip, dst ip, src port, dst port, ip_proto all are
361 * same with spkt. In addition, IP header's Identification is a random
362 * field, we can handle it in IP fragmentation function later.
363 * COLO just concern the response net packet payload from primary guest
364 * and secondary guest are same or not, So we ignored all IP header include
365 * other field like TOS,TTL,IP Checksum. we only need to compare
366 * the ip payload here.
367 */
368 if (colo_packet_compare_common(ppkt, spkt,
369 network_header_length + ETH_HLEN)) {
Zhang Chenf4b61832016-09-27 10:22:31 +0800370 trace_colo_compare_icmp_miscompare("primary pkt size",
371 ppkt->size);
Zhang Chenf4b61832016-09-27 10:22:31 +0800372 trace_colo_compare_icmp_miscompare("Secondary pkt size",
373 spkt->size);
Zhang Chen1723a7f2017-03-02 17:54:18 +0800374 if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
375 qemu_hexdump((char *)ppkt->data, stderr, "colo-compare pri pkt",
376 ppkt->size);
377 qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
378 spkt->size);
379 }
Zhang Chenf4b61832016-09-27 10:22:31 +0800380 return -1;
381 } else {
382 return 0;
383 }
384}
385
386/*
387 * Called from the compare thread on the primary
388 * for compare other packet
389 */
390static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
391{
392 trace_colo_compare_main("compare other");
Zhang Chene630b2b2017-03-09 15:40:07 +0800393 if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
394 char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
395
396 strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
397 strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
398 strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
399 strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
400
401 trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
402 pri_ip_dst, spkt->size,
403 sec_ip_src, sec_ip_dst);
404 }
405
Zhang Chen6efeb322017-03-02 17:54:17 +0800406 return colo_packet_compare_common(ppkt, spkt, 0);
Zhang Chen0682e152016-09-27 10:22:30 +0800407}
408
409static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
410{
411 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_HOST);
412
413 if ((now - pkt->creation_ms) > (*check_time)) {
414 trace_colo_old_packet_check_found(pkt->creation_ms);
415 return 0;
416 } else {
417 return 1;
418 }
419}
420
Zhang Chend25a7da2017-04-12 20:12:07 +0800421static int colo_old_packet_check_one_conn(Connection *conn,
422 void *user_data)
Zhang Chen0682e152016-09-27 10:22:30 +0800423{
Zhang Chen0682e152016-09-27 10:22:30 +0800424 GList *result = NULL;
425 int64_t check_time = REGULAR_PACKET_CHECK_MS;
426
427 result = g_queue_find_custom(&conn->primary_list,
428 &check_time,
429 (GCompareFunc)colo_old_packet_check_one);
430
431 if (result) {
432 /* do checkpoint will flush old packet */
433 /* TODO: colo_notify_checkpoint();*/
Zhang Chend25a7da2017-04-12 20:12:07 +0800434 return 0;
Zhang Chen0682e152016-09-27 10:22:30 +0800435 }
Zhang Chend25a7da2017-04-12 20:12:07 +0800436
437 return 1;
Zhang Chen0682e152016-09-27 10:22:30 +0800438}
439
440/*
441 * Look for old packets that the secondary hasn't matched,
442 * if we have some then we have to checkpoint to wake
443 * the secondary up.
444 */
445static void colo_old_packet_check(void *opaque)
446{
447 CompareState *s = opaque;
448
Zhang Chend25a7da2017-04-12 20:12:07 +0800449 /*
450 * If we find one old packet, stop finding job and notify
451 * COLO frame do checkpoint.
452 */
453 g_queue_find_custom(&s->conn_list, NULL,
454 (GCompareFunc)colo_old_packet_check_one_conn);
Zhang Chen0682e152016-09-27 10:22:30 +0800455}
456
457/*
458 * Called from the compare thread on the primary
459 * for compare connection
460 */
461static void colo_compare_connection(void *opaque, void *user_data)
462{
463 CompareState *s = user_data;
464 Connection *conn = opaque;
465 Packet *pkt = NULL;
466 GList *result = NULL;
467 int ret;
468
469 while (!g_queue_is_empty(&conn->primary_list) &&
470 !g_queue_is_empty(&conn->secondary_list)) {
Zhang Chen0682e152016-09-27 10:22:30 +0800471 pkt = g_queue_pop_tail(&conn->primary_list);
Zhang Chenf4b61832016-09-27 10:22:31 +0800472 switch (conn->ip_proto) {
473 case IPPROTO_TCP:
474 result = g_queue_find_custom(&conn->secondary_list,
475 pkt, (GCompareFunc)colo_packet_compare_tcp);
476 break;
477 case IPPROTO_UDP:
478 result = g_queue_find_custom(&conn->secondary_list,
479 pkt, (GCompareFunc)colo_packet_compare_udp);
480 break;
481 case IPPROTO_ICMP:
482 result = g_queue_find_custom(&conn->secondary_list,
483 pkt, (GCompareFunc)colo_packet_compare_icmp);
484 break;
485 default:
486 result = g_queue_find_custom(&conn->secondary_list,
487 pkt, (GCompareFunc)colo_packet_compare_other);
488 break;
489 }
Zhang Chen0682e152016-09-27 10:22:30 +0800490
491 if (result) {
Zhang Chenaa3a7032017-07-04 14:53:52 +0800492 ret = compare_chr_send(s,
493 pkt->data,
494 pkt->size,
495 pkt->vnet_hdr_len);
Zhang Chen0682e152016-09-27 10:22:30 +0800496 if (ret < 0) {
497 error_report("colo_send_primary_packet failed");
498 }
499 trace_colo_compare_main("packet same and release packet");
500 g_queue_remove(&conn->secondary_list, result->data);
501 packet_destroy(pkt, NULL);
502 } else {
503 /*
504 * If one packet arrive late, the secondary_list or
505 * primary_list will be empty, so we can't compare it
506 * until next comparison.
507 */
508 trace_colo_compare_main("packet different");
Zhang Chen0682e152016-09-27 10:22:30 +0800509 g_queue_push_tail(&conn->primary_list, pkt);
Zhang Chen0682e152016-09-27 10:22:30 +0800510 /* TODO: colo_notify_checkpoint();*/
511 break;
512 }
513 }
514}
515
Zhang Chen3037e7a2017-07-04 14:53:51 +0800516static int compare_chr_send(CompareState *s,
Zhang Chen59509ec2016-09-27 10:22:27 +0800517 const uint8_t *buf,
Zhang Chenaa3a7032017-07-04 14:53:52 +0800518 uint32_t size,
519 uint32_t vnet_hdr_len)
Zhang Chen59509ec2016-09-27 10:22:27 +0800520{
521 int ret = 0;
522 uint32_t len = htonl(size);
523
524 if (!size) {
525 return 0;
526 }
527
Zhang Chen3037e7a2017-07-04 14:53:51 +0800528 ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
Zhang Chen59509ec2016-09-27 10:22:27 +0800529 if (ret != sizeof(len)) {
530 goto err;
531 }
532
Zhang Chenaa3a7032017-07-04 14:53:52 +0800533 if (s->vnet_hdr) {
534 /*
535 * We send vnet header len make other module(like filter-redirector)
536 * know how to parse net packet correctly.
537 */
538 len = htonl(vnet_hdr_len);
539 ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
540 if (ret != sizeof(len)) {
541 goto err;
542 }
543 }
544
Zhang Chen3037e7a2017-07-04 14:53:51 +0800545 ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
Zhang Chen59509ec2016-09-27 10:22:27 +0800546 if (ret != size) {
547 goto err;
548 }
549
550 return 0;
551
552err:
553 return ret < 0 ? ret : -EIO;
554}
555
Zhang Chen0682e152016-09-27 10:22:30 +0800556static int compare_chr_can_read(void *opaque)
557{
558 return COMPARE_READ_LEN_MAX;
559}
560
561/*
562 * Called from the main thread on the primary for packets
563 * arriving over the socket from the primary.
564 */
565static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size)
566{
567 CompareState *s = COLO_COMPARE(opaque);
568 int ret;
569
570 ret = net_fill_rstate(&s->pri_rs, buf, size);
571 if (ret == -1) {
Anton Nefedov81517ba2017-07-06 15:08:49 +0300572 qemu_chr_fe_set_handlers(&s->chr_pri_in, NULL, NULL, NULL, NULL,
Marc-André Lureau39ab61c2016-10-22 12:53:03 +0300573 NULL, NULL, true);
Zhang Chen0682e152016-09-27 10:22:30 +0800574 error_report("colo-compare primary_in error");
575 }
576}
577
578/*
579 * Called from the main thread on the primary for packets
580 * arriving over the socket from the secondary.
581 */
582static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size)
583{
584 CompareState *s = COLO_COMPARE(opaque);
585 int ret;
586
587 ret = net_fill_rstate(&s->sec_rs, buf, size);
588 if (ret == -1) {
Anton Nefedov81517ba2017-07-06 15:08:49 +0300589 qemu_chr_fe_set_handlers(&s->chr_sec_in, NULL, NULL, NULL, NULL,
Marc-André Lureau39ab61c2016-10-22 12:53:03 +0300590 NULL, NULL, true);
Zhang Chen0682e152016-09-27 10:22:30 +0800591 error_report("colo-compare secondary_in error");
592 }
593}
594
zhanghailiang66d2a242017-02-17 10:53:11 +0800595/*
596 * Check old packet regularly so it can watch for any packets
597 * that the secondary hasn't produced equivalents of.
598 */
599static gboolean check_old_packet_regular(void *opaque)
600{
601 CompareState *s = opaque;
602
603 /* if have old packet we will notify checkpoint */
604 colo_old_packet_check(s);
605
606 return TRUE;
607}
608
Zhang Chen0682e152016-09-27 10:22:30 +0800609static void *colo_compare_thread(void *opaque)
610{
Zhang Chen0682e152016-09-27 10:22:30 +0800611 CompareState *s = opaque;
zhanghailiang66d2a242017-02-17 10:53:11 +0800612 GSource *timeout_source;
Zhang Chen0682e152016-09-27 10:22:30 +0800613
zhanghailiangb43decb2017-02-17 10:53:14 +0800614 s->worker_context = g_main_context_new();
Zhang Chen0682e152016-09-27 10:22:30 +0800615
Marc-André Lureau5345fdb2016-10-22 12:52:55 +0300616 qemu_chr_fe_set_handlers(&s->chr_pri_in, compare_chr_can_read,
Anton Nefedov81517ba2017-07-06 15:08:49 +0300617 compare_pri_chr_in, NULL, NULL,
618 s, s->worker_context, true);
Marc-André Lureau5345fdb2016-10-22 12:52:55 +0300619 qemu_chr_fe_set_handlers(&s->chr_sec_in, compare_chr_can_read,
Anton Nefedov81517ba2017-07-06 15:08:49 +0300620 compare_sec_chr_in, NULL, NULL,
621 s, s->worker_context, true);
Zhang Chen0682e152016-09-27 10:22:30 +0800622
zhanghailiangb43decb2017-02-17 10:53:14 +0800623 s->compare_loop = g_main_loop_new(s->worker_context, FALSE);
Zhang Chen0682e152016-09-27 10:22:30 +0800624
zhanghailiang66d2a242017-02-17 10:53:11 +0800625 /* To kick any packets that the secondary doesn't match */
626 timeout_source = g_timeout_source_new(REGULAR_PACKET_CHECK_MS);
627 g_source_set_callback(timeout_source,
628 (GSourceFunc)check_old_packet_regular, s, NULL);
zhanghailiangb43decb2017-02-17 10:53:14 +0800629 g_source_attach(timeout_source, s->worker_context);
zhanghailiang66d2a242017-02-17 10:53:11 +0800630
zhanghailiangdfd917a2017-02-17 10:53:12 +0800631 g_main_loop_run(s->compare_loop);
Zhang Chen0682e152016-09-27 10:22:30 +0800632
zhanghailiang66d2a242017-02-17 10:53:11 +0800633 g_source_unref(timeout_source);
zhanghailiangdfd917a2017-02-17 10:53:12 +0800634 g_main_loop_unref(s->compare_loop);
zhanghailiangb43decb2017-02-17 10:53:14 +0800635 g_main_context_unref(s->worker_context);
Zhang Chen0682e152016-09-27 10:22:30 +0800636 return NULL;
637}
638
Zhang Chen7dce4e62016-09-27 10:22:26 +0800639static char *compare_get_pri_indev(Object *obj, Error **errp)
640{
641 CompareState *s = COLO_COMPARE(obj);
642
643 return g_strdup(s->pri_indev);
644}
645
646static void compare_set_pri_indev(Object *obj, const char *value, Error **errp)
647{
648 CompareState *s = COLO_COMPARE(obj);
649
650 g_free(s->pri_indev);
651 s->pri_indev = g_strdup(value);
652}
653
654static char *compare_get_sec_indev(Object *obj, Error **errp)
655{
656 CompareState *s = COLO_COMPARE(obj);
657
658 return g_strdup(s->sec_indev);
659}
660
661static void compare_set_sec_indev(Object *obj, const char *value, Error **errp)
662{
663 CompareState *s = COLO_COMPARE(obj);
664
665 g_free(s->sec_indev);
666 s->sec_indev = g_strdup(value);
667}
668
669static char *compare_get_outdev(Object *obj, Error **errp)
670{
671 CompareState *s = COLO_COMPARE(obj);
672
673 return g_strdup(s->outdev);
674}
675
676static void compare_set_outdev(Object *obj, const char *value, Error **errp)
677{
678 CompareState *s = COLO_COMPARE(obj);
679
680 g_free(s->outdev);
681 s->outdev = g_strdup(value);
682}
683
Zhang Chenaa3a7032017-07-04 14:53:52 +0800684static bool compare_get_vnet_hdr(Object *obj, Error **errp)
685{
686 CompareState *s = COLO_COMPARE(obj);
687
688 return s->vnet_hdr;
689}
690
691static void compare_set_vnet_hdr(Object *obj,
692 bool value,
693 Error **errp)
694{
695 CompareState *s = COLO_COMPARE(obj);
696
697 s->vnet_hdr = value;
698}
699
Zhang Chen7dce4e62016-09-27 10:22:26 +0800700static void compare_pri_rs_finalize(SocketReadState *pri_rs)
701{
Zhang Chen59509ec2016-09-27 10:22:27 +0800702 CompareState *s = container_of(pri_rs, CompareState, pri_rs);
703
704 if (packet_enqueue(s, PRIMARY_IN)) {
705 trace_colo_compare_main("primary: unsupported packet in");
Zhang Chenaa3a7032017-07-04 14:53:52 +0800706 compare_chr_send(s,
707 pri_rs->buf,
708 pri_rs->packet_len,
709 pri_rs->vnet_hdr_len);
Zhang Chen0682e152016-09-27 10:22:30 +0800710 } else {
711 /* compare connection */
712 g_queue_foreach(&s->conn_list, colo_compare_connection, s);
Zhang Chen59509ec2016-09-27 10:22:27 +0800713 }
Zhang Chen7dce4e62016-09-27 10:22:26 +0800714}
715
716static void compare_sec_rs_finalize(SocketReadState *sec_rs)
717{
Zhang Chen59509ec2016-09-27 10:22:27 +0800718 CompareState *s = container_of(sec_rs, CompareState, sec_rs);
719
720 if (packet_enqueue(s, SECONDARY_IN)) {
721 trace_colo_compare_main("secondary: unsupported packet in");
Zhang Chen0682e152016-09-27 10:22:30 +0800722 } else {
723 /* compare connection */
724 g_queue_foreach(&s->conn_list, colo_compare_connection, s);
Zhang Chen59509ec2016-09-27 10:22:27 +0800725 }
Zhang Chen7dce4e62016-09-27 10:22:26 +0800726}
727
Zhang Chen7dce4e62016-09-27 10:22:26 +0800728
729/*
730 * Return 0 is success.
731 * Return 1 is failed.
732 */
Marc-André Lureau0ec7b3e2016-12-07 16:20:22 +0300733static int find_and_check_chardev(Chardev **chr,
Zhang Chen7dce4e62016-09-27 10:22:26 +0800734 char *chr_name,
735 Error **errp)
736{
Zhang Chen7dce4e62016-09-27 10:22:26 +0800737 *chr = qemu_chr_find(chr_name);
738 if (*chr == NULL) {
739 error_setg(errp, "Device '%s' not found",
740 chr_name);
741 return 1;
742 }
743
Daniel P. Berrange0a733362016-10-07 13:18:34 +0100744 if (!qemu_chr_has_feature(*chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) {
745 error_setg(errp, "chardev \"%s\" is not reconnectable",
Zhang Chen7dce4e62016-09-27 10:22:26 +0800746 chr_name);
747 return 1;
748 }
Marc-André Lureaufbf3cc32016-10-22 12:52:54 +0300749
Zhang Chen7dce4e62016-09-27 10:22:26 +0800750 return 0;
751}
752
753/*
754 * Called from the main thread on the primary
755 * to setup colo-compare.
756 */
757static void colo_compare_complete(UserCreatable *uc, Error **errp)
758{
759 CompareState *s = COLO_COMPARE(uc);
Marc-André Lureau0ec7b3e2016-12-07 16:20:22 +0300760 Chardev *chr;
Zhang Chen0682e152016-09-27 10:22:30 +0800761 char thread_name[64];
762 static int compare_id;
Zhang Chen7dce4e62016-09-27 10:22:26 +0800763
764 if (!s->pri_indev || !s->sec_indev || !s->outdev) {
765 error_setg(errp, "colo compare needs 'primary_in' ,"
766 "'secondary_in','outdev' property set");
767 return;
768 } else if (!strcmp(s->pri_indev, s->outdev) ||
769 !strcmp(s->sec_indev, s->outdev) ||
770 !strcmp(s->pri_indev, s->sec_indev)) {
771 error_setg(errp, "'indev' and 'outdev' could not be same "
772 "for compare module");
773 return;
774 }
775
Marc-André Lureau5345fdb2016-10-22 12:52:55 +0300776 if (find_and_check_chardev(&chr, s->pri_indev, errp) ||
777 !qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) {
Zhang Chen7dce4e62016-09-27 10:22:26 +0800778 return;
779 }
780
Marc-André Lureau5345fdb2016-10-22 12:52:55 +0300781 if (find_and_check_chardev(&chr, s->sec_indev, errp) ||
782 !qemu_chr_fe_init(&s->chr_sec_in, chr, errp)) {
Zhang Chen7dce4e62016-09-27 10:22:26 +0800783 return;
784 }
785
Marc-André Lureau5345fdb2016-10-22 12:52:55 +0300786 if (find_and_check_chardev(&chr, s->outdev, errp) ||
787 !qemu_chr_fe_init(&s->chr_out, chr, errp)) {
Zhang Chen7dce4e62016-09-27 10:22:26 +0800788 return;
789 }
790
Zhang Chenaa3a7032017-07-04 14:53:52 +0800791 net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
792 net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
Zhang Chen7dce4e62016-09-27 10:22:26 +0800793
Zhang Chenb6540d42016-09-27 10:22:29 +0800794 g_queue_init(&s->conn_list);
795
796 s->connection_track_table = g_hash_table_new_full(connection_key_hash,
797 connection_key_equal,
798 g_free,
799 connection_destroy);
Zhang Chen59509ec2016-09-27 10:22:27 +0800800
Zhang Chen0682e152016-09-27 10:22:30 +0800801 sprintf(thread_name, "colo-compare %d", compare_id);
802 qemu_thread_create(&s->thread, thread_name,
803 colo_compare_thread, s,
804 QEMU_THREAD_JOINABLE);
805 compare_id++;
806
Zhang Chen7dce4e62016-09-27 10:22:26 +0800807 return;
808}
809
zhanghailiangdfd917a2017-02-17 10:53:12 +0800810static void colo_flush_packets(void *opaque, void *user_data)
811{
812 CompareState *s = user_data;
813 Connection *conn = opaque;
814 Packet *pkt = NULL;
815
816 while (!g_queue_is_empty(&conn->primary_list)) {
817 pkt = g_queue_pop_head(&conn->primary_list);
Zhang Chenaa3a7032017-07-04 14:53:52 +0800818 compare_chr_send(s,
819 pkt->data,
820 pkt->size,
821 pkt->vnet_hdr_len);
zhanghailiangdfd917a2017-02-17 10:53:12 +0800822 packet_destroy(pkt, NULL);
823 }
824 while (!g_queue_is_empty(&conn->secondary_list)) {
825 pkt = g_queue_pop_head(&conn->secondary_list);
826 packet_destroy(pkt, NULL);
827 }
828}
829
Zhang Chen7dce4e62016-09-27 10:22:26 +0800830static void colo_compare_class_init(ObjectClass *oc, void *data)
831{
832 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
833
834 ucc->complete = colo_compare_complete;
835}
836
837static void colo_compare_init(Object *obj)
838{
Zhang Chenaa3a7032017-07-04 14:53:52 +0800839 CompareState *s = COLO_COMPARE(obj);
840
Zhang Chen7dce4e62016-09-27 10:22:26 +0800841 object_property_add_str(obj, "primary_in",
842 compare_get_pri_indev, compare_set_pri_indev,
843 NULL);
844 object_property_add_str(obj, "secondary_in",
845 compare_get_sec_indev, compare_set_sec_indev,
846 NULL);
847 object_property_add_str(obj, "outdev",
848 compare_get_outdev, compare_set_outdev,
849 NULL);
Zhang Chenaa3a7032017-07-04 14:53:52 +0800850
851 s->vnet_hdr = false;
852 object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
853 compare_set_vnet_hdr, NULL);
Zhang Chen7dce4e62016-09-27 10:22:26 +0800854}
855
856static void colo_compare_finalize(Object *obj)
857{
858 CompareState *s = COLO_COMPARE(obj);
859
Marc-André Lureau1ce26102017-01-27 00:49:13 +0400860 qemu_chr_fe_deinit(&s->chr_pri_in, false);
861 qemu_chr_fe_deinit(&s->chr_sec_in, false);
862 qemu_chr_fe_deinit(&s->chr_out, false);
Zhang Chen7dce4e62016-09-27 10:22:26 +0800863
zhanghailiangdfd917a2017-02-17 10:53:12 +0800864 g_main_loop_quit(s->compare_loop);
865 qemu_thread_join(&s->thread);
866
867 /* Release all unhandled packets after compare thead exited */
868 g_queue_foreach(&s->conn_list, colo_flush_packets, s);
869
Zhang Chen727c2d72017-02-22 13:16:06 +0800870 g_queue_clear(&s->conn_list);
Zhang Chenb6540d42016-09-27 10:22:29 +0800871
zhanghailiangdfd917a2017-02-17 10:53:12 +0800872 g_hash_table_destroy(s->connection_track_table);
Zhang Chen7dce4e62016-09-27 10:22:26 +0800873 g_free(s->pri_indev);
874 g_free(s->sec_indev);
875 g_free(s->outdev);
876}
877
878static const TypeInfo colo_compare_info = {
879 .name = TYPE_COLO_COMPARE,
880 .parent = TYPE_OBJECT,
881 .instance_size = sizeof(CompareState),
882 .instance_init = colo_compare_init,
883 .instance_finalize = colo_compare_finalize,
884 .class_size = sizeof(CompareClass),
885 .class_init = colo_compare_class_init,
886 .interfaces = (InterfaceInfo[]) {
887 { TYPE_USER_CREATABLE },
888 { }
889 }
890};
891
892static void register_types(void)
893{
894 type_register_static(&colo_compare_info);
895}
896
897type_init(register_types);