bellard | 7a5ca86 | 2008-05-27 21:13:40 +0000 | [diff] [blame^] | 1 | /*\ |
| 2 | * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> |
| 3 | * |
| 4 | * Network Block Device |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License as published by |
| 8 | * the Free Software Foundation; under version 2 of the License. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * GNU General Public License for more details. |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License |
| 16 | * along with this program; if not, write to the Free Software |
| 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 18 | \*/ |
| 19 | |
| 20 | #include "nbd.h" |
| 21 | |
| 22 | #include <errno.h> |
| 23 | #include <string.h> |
| 24 | #include <sys/ioctl.h> |
| 25 | #include <ctype.h> |
| 26 | #include <inttypes.h> |
| 27 | #include <sys/socket.h> |
| 28 | #include <netinet/in.h> |
| 29 | #include <netinet/tcp.h> |
| 30 | #include <arpa/inet.h> |
| 31 | #include <netdb.h> |
| 32 | |
| 33 | extern int verbose; |
| 34 | |
| 35 | #define LOG(msg, ...) do { \ |
| 36 | fprintf(stderr, "%s:%s():L%d: " msg "\n", \ |
| 37 | __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ |
| 38 | } while(0) |
| 39 | |
| 40 | #define TRACE(msg, ...) do { \ |
| 41 | if (verbose) LOG(msg, ## __VA_ARGS__); \ |
| 42 | } while(0) |
| 43 | |
| 44 | /* This is all part of the "official" NBD API */ |
| 45 | |
| 46 | #define NBD_REQUEST_MAGIC 0x25609513 |
| 47 | #define NBD_REPLY_MAGIC 0x67446698 |
| 48 | |
| 49 | #define NBD_SET_SOCK _IO(0xab, 0) |
| 50 | #define NBD_SET_BLKSIZE _IO(0xab, 1) |
| 51 | #define NBD_SET_SIZE _IO(0xab, 2) |
| 52 | #define NBD_DO_IT _IO(0xab, 3) |
| 53 | #define NBD_CLEAR_SOCK _IO(0xab, 4) |
| 54 | #define NBD_CLEAR_QUE _IO(0xab, 5) |
| 55 | #define NBD_PRINT_DEBUG _IO(0xab, 6) |
| 56 | #define NBD_SET_SIZE_BLOCKS _IO(0xab, 7) |
| 57 | #define NBD_DISCONNECT _IO(0xab, 8) |
| 58 | |
| 59 | /* That's all folks */ |
| 60 | |
| 61 | #define read_sync(fd, buffer, size) wr_sync(fd, buffer, size, true) |
| 62 | #define write_sync(fd, buffer, size) wr_sync(fd, buffer, size, false) |
| 63 | |
| 64 | static size_t wr_sync(int fd, void *buffer, size_t size, bool do_read) |
| 65 | { |
| 66 | size_t offset = 0; |
| 67 | |
| 68 | while (offset < size) { |
| 69 | ssize_t len; |
| 70 | |
| 71 | if (do_read) { |
| 72 | len = read(fd, buffer + offset, size - offset); |
| 73 | } else { |
| 74 | len = write(fd, buffer + offset, size - offset); |
| 75 | } |
| 76 | |
| 77 | /* recoverable error */ |
| 78 | if (len == -1 && errno == EAGAIN) { |
| 79 | continue; |
| 80 | } |
| 81 | |
| 82 | /* eof */ |
| 83 | if (len == 0) { |
| 84 | break; |
| 85 | } |
| 86 | |
| 87 | /* unrecoverable error */ |
| 88 | if (len == -1) { |
| 89 | return 0; |
| 90 | } |
| 91 | |
| 92 | offset += len; |
| 93 | } |
| 94 | |
| 95 | return offset; |
| 96 | } |
| 97 | |
| 98 | static int tcp_socket_outgoing(const char *address, uint16_t port) |
| 99 | { |
| 100 | int s; |
| 101 | struct in_addr in; |
| 102 | struct sockaddr_in addr; |
| 103 | int serrno; |
| 104 | |
| 105 | s = socket(PF_INET, SOCK_STREAM, 0); |
| 106 | if (s == -1) { |
| 107 | return -1; |
| 108 | } |
| 109 | |
| 110 | if (inet_aton(address, &in) == 0) { |
| 111 | struct hostent *ent; |
| 112 | |
| 113 | ent = gethostbyname(address); |
| 114 | if (ent == NULL) { |
| 115 | goto error; |
| 116 | } |
| 117 | |
| 118 | memcpy(&in, ent->h_addr, sizeof(in)); |
| 119 | } |
| 120 | |
| 121 | addr.sin_family = AF_INET; |
| 122 | addr.sin_port = htons(port); |
| 123 | memcpy(&addr.sin_addr.s_addr, &in, sizeof(in)); |
| 124 | |
| 125 | if (connect(s, (struct sockaddr *)&addr, sizeof(addr)) == -1) { |
| 126 | goto error; |
| 127 | } |
| 128 | |
| 129 | return s; |
| 130 | error: |
| 131 | serrno = errno; |
| 132 | close(s); |
| 133 | errno = serrno; |
| 134 | return -1; |
| 135 | } |
| 136 | |
| 137 | int tcp_socket_incoming(const char *address, uint16_t port) |
| 138 | { |
| 139 | int s; |
| 140 | struct in_addr in; |
| 141 | struct sockaddr_in addr; |
| 142 | int serrno; |
| 143 | int opt; |
| 144 | |
| 145 | s = socket(PF_INET, SOCK_STREAM, 0); |
| 146 | if (s == -1) { |
| 147 | return -1; |
| 148 | } |
| 149 | |
| 150 | if (inet_aton(address, &in) == 0) { |
| 151 | struct hostent *ent; |
| 152 | |
| 153 | ent = gethostbyname(address); |
| 154 | if (ent == NULL) { |
| 155 | goto error; |
| 156 | } |
| 157 | |
| 158 | memcpy(&in, ent->h_addr, sizeof(in)); |
| 159 | } |
| 160 | |
| 161 | addr.sin_family = AF_INET; |
| 162 | addr.sin_port = htons(port); |
| 163 | memcpy(&addr.sin_addr.s_addr, &in, sizeof(in)); |
| 164 | |
| 165 | opt = 1; |
| 166 | if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) == -1) { |
| 167 | goto error; |
| 168 | } |
| 169 | |
| 170 | if (bind(s, (struct sockaddr *)&addr, sizeof(addr)) == -1) { |
| 171 | goto error; |
| 172 | } |
| 173 | |
| 174 | if (listen(s, 128) == -1) { |
| 175 | goto error; |
| 176 | } |
| 177 | |
| 178 | return s; |
| 179 | error: |
| 180 | serrno = errno; |
| 181 | close(s); |
| 182 | errno = serrno; |
| 183 | return -1; |
| 184 | } |
| 185 | |
| 186 | /* Basic flow |
| 187 | |
| 188 | Server Client |
| 189 | |
| 190 | Negotiate |
| 191 | Request |
| 192 | Response |
| 193 | Request |
| 194 | Response |
| 195 | ... |
| 196 | ... |
| 197 | Request (type == 2) |
| 198 | */ |
| 199 | |
| 200 | int nbd_negotiate(BlockDriverState *bs, int csock, off_t size) |
| 201 | { |
| 202 | char buf[8 + 8 + 8 + 128]; |
| 203 | |
| 204 | /* Negotiate |
| 205 | [ 0 .. 7] passwd ("NBDMAGIC") |
| 206 | [ 8 .. 15] magic (0x00420281861253) |
| 207 | [16 .. 23] size |
| 208 | [24 .. 151] reserved (0) |
| 209 | */ |
| 210 | |
| 211 | TRACE("Beginning negotiation."); |
| 212 | memcpy(buf, "NBDMAGIC", 8); |
| 213 | cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL); |
| 214 | cpu_to_be64w((uint64_t*)(buf + 16), size); |
| 215 | memset(buf + 24, 0, 128); |
| 216 | |
| 217 | if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { |
| 218 | LOG("write failed"); |
| 219 | errno = EINVAL; |
| 220 | return -1; |
| 221 | } |
| 222 | |
| 223 | TRACE("Negotation succeeded."); |
| 224 | |
| 225 | return 0; |
| 226 | } |
| 227 | |
| 228 | int nbd_receive_negotiate(int fd, int csock) |
| 229 | { |
| 230 | char buf[8 + 8 + 8 + 128]; |
| 231 | uint64_t magic; |
| 232 | off_t size; |
| 233 | size_t blocksize; |
| 234 | |
| 235 | TRACE("Receiving negotation."); |
| 236 | |
| 237 | if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { |
| 238 | LOG("read failed"); |
| 239 | errno = EINVAL; |
| 240 | return -1; |
| 241 | } |
| 242 | |
| 243 | magic = be64_to_cpup((uint64_t*)(buf + 8)); |
| 244 | size = be64_to_cpup((uint64_t*)(buf + 16)); |
| 245 | blocksize = 1024; |
| 246 | |
| 247 | TRACE("Magic is %c%c%c%c%c%c%c%c", |
| 248 | isprint(buf[0]) ? buf[0] : '.', |
| 249 | isprint(buf[1]) ? buf[1] : '.', |
| 250 | isprint(buf[2]) ? buf[2] : '.', |
| 251 | isprint(buf[3]) ? buf[3] : '.', |
| 252 | isprint(buf[4]) ? buf[4] : '.', |
| 253 | isprint(buf[5]) ? buf[5] : '.', |
| 254 | isprint(buf[6]) ? buf[6] : '.', |
| 255 | isprint(buf[7]) ? buf[7] : '.'); |
| 256 | TRACE("Magic is 0x%" PRIx64, magic); |
| 257 | TRACE("Size is %" PRIu64, size); |
| 258 | |
| 259 | if (memcmp(buf, "NBDMAGIC", 8) != 0) { |
| 260 | LOG("Invalid magic received"); |
| 261 | errno = EINVAL; |
| 262 | return -1; |
| 263 | } |
| 264 | |
| 265 | TRACE("Checking magic"); |
| 266 | |
| 267 | if (magic != 0x00420281861253LL) { |
| 268 | LOG("Bad magic received"); |
| 269 | errno = EINVAL; |
| 270 | return -1; |
| 271 | } |
| 272 | |
| 273 | TRACE("Setting block size to %lu", (unsigned long)blocksize); |
| 274 | |
| 275 | if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) == -1) { |
| 276 | int serrno = errno; |
| 277 | LOG("Failed setting NBD block size"); |
| 278 | errno = serrno; |
| 279 | return -1; |
| 280 | } |
| 281 | |
| 282 | TRACE("Setting size to %llu block(s)", |
| 283 | (unsigned long long)(size / blocksize)); |
| 284 | |
| 285 | if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) == -1) { |
| 286 | int serrno = errno; |
| 287 | LOG("Failed setting size (in blocks)"); |
| 288 | errno = serrno; |
| 289 | return -1; |
| 290 | } |
| 291 | |
| 292 | TRACE("Clearing NBD socket"); |
| 293 | |
| 294 | if (ioctl(fd, NBD_CLEAR_SOCK) == -1) { |
| 295 | int serrno = errno; |
| 296 | LOG("Failed clearing NBD socket"); |
| 297 | errno = serrno; |
| 298 | return -1; |
| 299 | } |
| 300 | |
| 301 | TRACE("Setting NBD socket"); |
| 302 | |
| 303 | if (ioctl(fd, NBD_SET_SOCK, csock) == -1) { |
| 304 | int serrno = errno; |
| 305 | LOG("Failed to set NBD socket"); |
| 306 | errno = serrno; |
| 307 | return -1; |
| 308 | } |
| 309 | |
| 310 | TRACE("Negotiation ended"); |
| 311 | |
| 312 | return 0; |
| 313 | } |
| 314 | |
| 315 | int nbd_disconnect(int fd) |
| 316 | { |
| 317 | ioctl(fd, NBD_CLEAR_QUE); |
| 318 | ioctl(fd, NBD_DISCONNECT); |
| 319 | ioctl(fd, NBD_CLEAR_SOCK); |
| 320 | return 0; |
| 321 | } |
| 322 | |
| 323 | int nbd_client(int fd, int csock) |
| 324 | { |
| 325 | int ret; |
| 326 | int serrno; |
| 327 | |
| 328 | TRACE("Doing NBD loop"); |
| 329 | |
| 330 | ret = ioctl(fd, NBD_DO_IT); |
| 331 | serrno = errno; |
| 332 | |
| 333 | TRACE("NBD loop returned %d: %s", ret, strerror(serrno)); |
| 334 | |
| 335 | TRACE("Clearing NBD queue"); |
| 336 | ioctl(fd, NBD_CLEAR_QUE); |
| 337 | |
| 338 | TRACE("Clearing NBD socket"); |
| 339 | ioctl(fd, NBD_CLEAR_SOCK); |
| 340 | |
| 341 | errno = serrno; |
| 342 | return ret; |
| 343 | } |
| 344 | |
| 345 | int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset, off_t *offset, bool readonly) |
| 346 | { |
| 347 | #ifndef _REENTRANT |
| 348 | static uint8_t data[1024 * 1024]; // keep this off of the stack |
| 349 | #else |
| 350 | uint8_t data[1024 * 1024]; |
| 351 | #endif |
| 352 | uint8_t buf[4 + 4 + 8 + 8 + 4]; |
| 353 | uint32_t magic; |
| 354 | uint32_t type; |
| 355 | uint64_t from; |
| 356 | uint32_t len; |
| 357 | |
| 358 | TRACE("Reading request."); |
| 359 | |
| 360 | if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { |
| 361 | LOG("read failed"); |
| 362 | errno = EINVAL; |
| 363 | return -1; |
| 364 | } |
| 365 | |
| 366 | /* Request |
| 367 | [ 0 .. 3] magic (NBD_REQUEST_MAGIC) |
| 368 | [ 4 .. 7] type (0 == READ, 1 == WRITE) |
| 369 | [ 8 .. 15] handle |
| 370 | [16 .. 23] from |
| 371 | [24 .. 27] len |
| 372 | */ |
| 373 | |
| 374 | magic = be32_to_cpup((uint32_t*)buf); |
| 375 | type = be32_to_cpup((uint32_t*)(buf + 4)); |
| 376 | from = be64_to_cpup((uint64_t*)(buf + 16)); |
| 377 | len = be32_to_cpup((uint32_t*)(buf + 24)); |
| 378 | |
| 379 | TRACE("Got request: " |
| 380 | "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }", |
| 381 | magic, type, from, len); |
| 382 | |
| 383 | |
| 384 | if (magic != NBD_REQUEST_MAGIC) { |
| 385 | LOG("invalid magic (got 0x%x)", magic); |
| 386 | errno = EINVAL; |
| 387 | return -1; |
| 388 | } |
| 389 | |
| 390 | if (len > sizeof(data)) { |
| 391 | LOG("len (%u) is larger than max len (%lu)", |
| 392 | len, sizeof(data)); |
| 393 | errno = EINVAL; |
| 394 | return -1; |
| 395 | } |
| 396 | |
| 397 | if ((from + len) < from) { |
| 398 | LOG("integer overflow detected! " |
| 399 | "you're probably being attacked"); |
| 400 | errno = EINVAL; |
| 401 | return -1; |
| 402 | } |
| 403 | |
| 404 | if ((from + len) > size) { |
| 405 | LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64 |
| 406 | ", Offset: %" PRIu64 "\n", |
| 407 | from, len, size, dev_offset); |
| 408 | LOG("requested operation past EOF--bad client?"); |
| 409 | errno = EINVAL; |
| 410 | return -1; |
| 411 | } |
| 412 | |
| 413 | /* Reply |
| 414 | [ 0 .. 3] magic (NBD_REPLY_MAGIC) |
| 415 | [ 4 .. 7] error (0 == no error) |
| 416 | [ 7 .. 15] handle |
| 417 | */ |
| 418 | cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC); |
| 419 | cpu_to_be32w((uint32_t*)(buf + 4), 0); |
| 420 | |
| 421 | TRACE("Decoding type"); |
| 422 | |
| 423 | switch (type) { |
| 424 | case 0: |
| 425 | TRACE("Request type is READ"); |
| 426 | |
| 427 | if (bdrv_read(bs, (from + dev_offset) / 512, data, len / 512) == -1) { |
| 428 | LOG("reading from file failed"); |
| 429 | errno = EINVAL; |
| 430 | return -1; |
| 431 | } |
| 432 | *offset += len; |
| 433 | |
| 434 | TRACE("Read %u byte(s)", len); |
| 435 | |
| 436 | TRACE("Sending OK response"); |
| 437 | |
| 438 | if (write_sync(csock, buf, 16) != 16) { |
| 439 | LOG("writing to socket failed"); |
| 440 | errno = EINVAL; |
| 441 | return -1; |
| 442 | } |
| 443 | |
| 444 | TRACE("Sending data to client"); |
| 445 | |
| 446 | if (write_sync(csock, data, len) != len) { |
| 447 | LOG("writing to socket failed"); |
| 448 | errno = EINVAL; |
| 449 | return -1; |
| 450 | } |
| 451 | break; |
| 452 | case 1: |
| 453 | TRACE("Request type is WRITE"); |
| 454 | |
| 455 | TRACE("Reading %u byte(s)", len); |
| 456 | |
| 457 | if (read_sync(csock, data, len) != len) { |
| 458 | LOG("reading from socket failed"); |
| 459 | errno = EINVAL; |
| 460 | return -1; |
| 461 | } |
| 462 | |
| 463 | if (readonly) { |
| 464 | TRACE("Server is read-only, return error"); |
| 465 | |
| 466 | cpu_to_be32w((uint32_t*)(buf + 4), 1); |
| 467 | } else { |
| 468 | TRACE("Writing to device"); |
| 469 | |
| 470 | if (bdrv_write(bs, (from + dev_offset) / 512, data, len / 512) == -1) { |
| 471 | LOG("writing to file failed"); |
| 472 | errno = EINVAL; |
| 473 | return -1; |
| 474 | } |
| 475 | |
| 476 | *offset += len; |
| 477 | } |
| 478 | |
| 479 | TRACE("Sending response to client"); |
| 480 | |
| 481 | if (write_sync(csock, buf, 16) != 16) { |
| 482 | LOG("writing to socket failed"); |
| 483 | errno = EINVAL; |
| 484 | return -1; |
| 485 | } |
| 486 | break; |
| 487 | case 2: |
| 488 | TRACE("Request type is DISCONNECT"); |
| 489 | errno = 0; |
| 490 | return 1; |
| 491 | default: |
| 492 | LOG("invalid request type (%u) received", type); |
| 493 | errno = EINVAL; |
| 494 | return -1; |
| 495 | } |
| 496 | |
| 497 | TRACE("Request/Reply complete"); |
| 498 | |
| 499 | return 0; |
| 500 | } |