blob: 06b7c13ccbb547a9eb6e13817971ae8f2f47544d [file] [log] [blame]
Alberto Garciadf957112024-07-30 16:15:52 +02001#!/usr/bin/env python3
2
3# This tool reads a disk image in any format and converts it to qcow2,
4# writing the result directly to stdout.
5#
6# Copyright (C) 2024 Igalia, S.L.
7#
8# Authors: Alberto Garcia <berto@igalia.com>
9# Madeeha Javed <javed@igalia.com>
10#
11# SPDX-License-Identifier: GPL-2.0-or-later
12#
13# qcow2 files produced by this script are always arranged like this:
14#
15# - qcow2 header
16# - refcount table
17# - refcount blocks
18# - L1 table
19# - L2 tables
20# - Data clusters
21#
22# A note about variable names: in qcow2 there is one refcount table
23# and one (active) L1 table, although each can occupy several
24# clusters. For the sake of simplicity the code sometimes talks about
25# refcount tables and L1 tables when referring to those clusters.
26
27import argparse
28import errno
29import math
30import os
31import signal
32import struct
33import subprocess
34import sys
35import tempfile
36import time
37from contextlib import contextmanager
38
39QCOW2_DEFAULT_CLUSTER_SIZE = 65536
40QCOW2_DEFAULT_REFCOUNT_BITS = 16
41QCOW2_FEATURE_NAME_TABLE = 0x6803F857
42QCOW2_DATA_FILE_NAME_STRING = 0x44415441
43QCOW2_V3_HEADER_LENGTH = 112 # Header length in QEMU 9.0. Must be a multiple of 8
44QCOW2_INCOMPAT_DATA_FILE_BIT = 2
45QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT = 1
46QCOW_OFLAG_COPIED = 1 << 63
47QEMU_STORAGE_DAEMON = "qemu-storage-daemon"
48
49
50def bitmap_set(bitmap, idx):
51 bitmap[idx // 8] |= 1 << (idx % 8)
52
53
54def bitmap_is_set(bitmap, idx):
55 return (bitmap[idx // 8] & (1 << (idx % 8))) != 0
56
57
58def bitmap_iterator(bitmap, length):
59 for idx in range(length):
60 if bitmap_is_set(bitmap, idx):
61 yield idx
62
63
64def align_up(num, d):
65 return d * math.ceil(num / d)
66
67
68# Holes in the input file contain only zeroes so we can skip them and
69# save time. This function returns the indexes of the clusters that
70# are known to contain data. Those are the ones that we need to read.
71def clusters_with_data(fd, cluster_size):
72 data_to = 0
73 while True:
74 try:
75 data_from = os.lseek(fd, data_to, os.SEEK_DATA)
76 data_to = align_up(os.lseek(fd, data_from, os.SEEK_HOLE), cluster_size)
77 for idx in range(data_from // cluster_size, data_to // cluster_size):
78 yield idx
79 except OSError as err:
80 if err.errno == errno.ENXIO: # End of file reached
81 break
82 raise err
83
84
85# write_qcow2_content() expects a raw input file. If we have a different
86# format we can use qemu-storage-daemon to make it appear as raw.
87@contextmanager
88def get_input_as_raw_file(input_file, input_format):
89 if input_format == "raw":
90 yield input_file
91 return
92 try:
93 temp_dir = tempfile.mkdtemp()
94 pid_file = os.path.join(temp_dir, "pid")
95 raw_file = os.path.join(temp_dir, "raw")
96 open(raw_file, "wb").close()
97 ret = subprocess.run(
98 [
99 QEMU_STORAGE_DAEMON,
100 "--daemonize",
101 "--pidfile", pid_file,
102 "--blockdev", f"driver=file,node-name=file0,driver=file,filename={input_file},read-only=on",
103 "--blockdev", f"driver={input_format},node-name=disk0,file=file0,read-only=on",
104 "--export", f"type=fuse,id=export0,node-name=disk0,mountpoint={raw_file},writable=off",
105 ],
106 capture_output=True,
107 )
108 if ret.returncode != 0:
109 sys.exit("[Error] Could not start the qemu-storage-daemon:\n" +
110 ret.stderr.decode().rstrip('\n'))
111 yield raw_file
112 finally:
113 # Kill the storage daemon on exit
114 # and remove all temporary files
115 if os.path.exists(pid_file):
116 with open(pid_file, "r") as f:
117 pid = int(f.readline())
118 os.kill(pid, signal.SIGTERM)
119 while os.path.exists(pid_file):
120 time.sleep(0.1)
121 os.unlink(raw_file)
122 os.rmdir(temp_dir)
123
124
125def write_features(cluster, offset, data_file_name):
126 if data_file_name is not None:
127 encoded_name = data_file_name.encode("utf-8")
128 padded_name_len = align_up(len(encoded_name), 8)
129 struct.pack_into(f">II{padded_name_len}s", cluster, offset,
130 QCOW2_DATA_FILE_NAME_STRING,
131 len(encoded_name),
132 encoded_name)
133 offset += 8 + padded_name_len
134
135 qcow2_features = [
136 # Incompatible
137 (0, 0, "dirty bit"),
138 (0, 1, "corrupt bit"),
139 (0, 2, "external data file"),
140 (0, 3, "compression type"),
141 (0, 4, "extended L2 entries"),
142 # Compatible
143 (1, 0, "lazy refcounts"),
144 # Autoclear
145 (2, 0, "bitmaps"),
146 (2, 1, "raw external data"),
147 ]
148 struct.pack_into(">I", cluster, offset, QCOW2_FEATURE_NAME_TABLE)
149 struct.pack_into(">I", cluster, offset + 4, len(qcow2_features) * 48)
150 offset += 8
151 for feature_type, feature_bit, feature_name in qcow2_features:
152 struct.pack_into(">BB46s", cluster, offset,
153 feature_type, feature_bit, feature_name.encode("ascii"))
154 offset += 48
155
156
157def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, data_file_raw):
158 # Some basic values
159 l1_entries_per_table = cluster_size // 8
160 l2_entries_per_table = cluster_size // 8
161 refcounts_per_table = cluster_size // 8
162 refcounts_per_block = cluster_size * 8 // refcount_bits
163
164 # Virtual disk size, number of data clusters and L1 entries
165 disk_size = align_up(os.path.getsize(input_file), 512)
166 total_data_clusters = math.ceil(disk_size / cluster_size)
167 l1_entries = math.ceil(total_data_clusters / l2_entries_per_table)
168 allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table)
169
170 # Max L1 table size is 32 MB (QCOW_MAX_L1_SIZE in block/qcow2.h)
171 if (l1_entries * 8) > (32 * 1024 * 1024):
172 sys.exit("[Error] The image size is too large. Try using a larger cluster size.")
173
174 # Two bitmaps indicating which L1 and L2 entries are set
175 l1_bitmap = bytearray(allocated_l1_tables * l1_entries_per_table // 8)
176 l2_bitmap = bytearray(l1_entries * l2_entries_per_table // 8)
177 allocated_l2_tables = 0
178 allocated_data_clusters = 0
179
180 if data_file_raw:
181 # If data_file_raw is set then all clusters are allocated and
182 # we don't need to read the input file at all.
183 allocated_l2_tables = l1_entries
184 for idx in range(l1_entries):
185 bitmap_set(l1_bitmap, idx)
186 for idx in range(total_data_clusters):
187 bitmap_set(l2_bitmap, idx)
188 else:
189 # Open the input file for reading
190 fd = os.open(input_file, os.O_RDONLY)
191 zero_cluster = bytes(cluster_size)
192 # Read all the clusters that contain data
193 for idx in clusters_with_data(fd, cluster_size):
194 cluster = os.pread(fd, cluster_size, cluster_size * idx)
195 # If the last cluster is smaller than cluster_size pad it with zeroes
196 if len(cluster) < cluster_size:
197 cluster += bytes(cluster_size - len(cluster))
198 # If a cluster has non-zero data then it must be allocated
199 # in the output file and its L2 entry must be set
200 if cluster != zero_cluster:
201 bitmap_set(l2_bitmap, idx)
202 allocated_data_clusters += 1
203 # Allocated data clusters also need their corresponding L1 entry and L2 table
204 l1_idx = math.floor(idx / l2_entries_per_table)
205 if not bitmap_is_set(l1_bitmap, l1_idx):
206 bitmap_set(l1_bitmap, l1_idx)
207 allocated_l2_tables += 1
208
209 # Total amount of allocated clusters excluding the refcount blocks and table
210 total_allocated_clusters = 1 + allocated_l1_tables + allocated_l2_tables
211 if data_file_name is None:
212 total_allocated_clusters += allocated_data_clusters
213
214 # Clusters allocated for the refcount blocks and table
215 allocated_refcount_blocks = math.ceil(total_allocated_clusters / refcounts_per_block)
216 allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table)
217
218 # Now we have a problem because allocated_refcount_blocks and allocated_refcount_tables...
219 # (a) increase total_allocated_clusters, and
220 # (b) need to be recalculated when total_allocated_clusters is increased
221 # So we need to repeat the calculation as long as the numbers change
222 while True:
223 new_total_allocated_clusters = total_allocated_clusters + allocated_refcount_tables + allocated_refcount_blocks
224 new_allocated_refcount_blocks = math.ceil(new_total_allocated_clusters / refcounts_per_block)
225 if new_allocated_refcount_blocks > allocated_refcount_blocks:
226 allocated_refcount_blocks = new_allocated_refcount_blocks
227 allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table)
228 else:
229 break
230
231 # Now that we have the final numbers we can update total_allocated_clusters
232 total_allocated_clusters += allocated_refcount_tables + allocated_refcount_blocks
233
234 # At this point we have the exact number of clusters that the output
235 # image is going to use so we can calculate all the offsets.
236 current_cluster_idx = 1
237
238 refcount_table_offset = current_cluster_idx * cluster_size
239 current_cluster_idx += allocated_refcount_tables
240
241 refcount_block_offset = current_cluster_idx * cluster_size
242 current_cluster_idx += allocated_refcount_blocks
243
244 l1_table_offset = current_cluster_idx * cluster_size
245 current_cluster_idx += allocated_l1_tables
246
247 l2_table_offset = current_cluster_idx * cluster_size
248 current_cluster_idx += allocated_l2_tables
249
250 data_clusters_offset = current_cluster_idx * cluster_size
251
252 # Calculate some values used in the qcow2 header
253 if allocated_l1_tables == 0:
254 l1_table_offset = 0
255
256 hdr_cluster_bits = int(math.log2(cluster_size))
257 hdr_refcount_bits = int(math.log2(refcount_bits))
258 hdr_length = QCOW2_V3_HEADER_LENGTH
259 hdr_incompat_features = 0
260 if data_file_name is not None:
261 hdr_incompat_features |= 1 << QCOW2_INCOMPAT_DATA_FILE_BIT
262 hdr_autoclear_features = 0
263 if data_file_raw:
264 hdr_autoclear_features |= 1 << QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT
265
266 ### Write qcow2 header
267 cluster = bytearray(cluster_size)
268 struct.pack_into(">4sIQIIQIIQQIIQQQQII", cluster, 0,
269 b"QFI\xfb", # QCOW magic string
270 3, # version
271 0, # backing file offset
272 0, # backing file sizes
273 hdr_cluster_bits,
274 disk_size,
275 0, # encryption method
276 l1_entries,
277 l1_table_offset,
278 refcount_table_offset,
279 allocated_refcount_tables,
280 0, # number of snapshots
281 0, # snapshot table offset
282 hdr_incompat_features,
283 0, # compatible features
284 hdr_autoclear_features,
285 hdr_refcount_bits,
286 hdr_length,
287 )
288
289 write_features(cluster, hdr_length, data_file_name)
290
291 sys.stdout.buffer.write(cluster)
292
293 ### Write refcount table
294 cur_offset = refcount_block_offset
295 remaining_refcount_table_entries = allocated_refcount_blocks # Each entry is a pointer to a refcount block
296 while remaining_refcount_table_entries > 0:
297 cluster = bytearray(cluster_size)
298 to_write = min(remaining_refcount_table_entries, refcounts_per_table)
299 remaining_refcount_table_entries -= to_write
300 for idx in range(to_write):
301 struct.pack_into(">Q", cluster, idx * 8, cur_offset)
302 cur_offset += cluster_size
303 sys.stdout.buffer.write(cluster)
304
305 ### Write refcount blocks
306 remaining_refcount_block_entries = total_allocated_clusters # One entry for each allocated cluster
307 for tbl in range(allocated_refcount_blocks):
308 cluster = bytearray(cluster_size)
309 to_write = min(remaining_refcount_block_entries, refcounts_per_block)
310 remaining_refcount_block_entries -= to_write
311 # All refcount entries contain the number 1. The only difference
312 # is their bit width, defined when the image is created.
313 for idx in range(to_write):
314 if refcount_bits == 64:
315 struct.pack_into(">Q", cluster, idx * 8, 1)
316 elif refcount_bits == 32:
317 struct.pack_into(">L", cluster, idx * 4, 1)
318 elif refcount_bits == 16:
319 struct.pack_into(">H", cluster, idx * 2, 1)
320 elif refcount_bits == 8:
321 cluster[idx] = 1
322 elif refcount_bits == 4:
323 cluster[idx // 2] |= 1 << ((idx % 2) * 4)
324 elif refcount_bits == 2:
325 cluster[idx // 4] |= 1 << ((idx % 4) * 2)
326 elif refcount_bits == 1:
327 cluster[idx // 8] |= 1 << (idx % 8)
328 sys.stdout.buffer.write(cluster)
329
330 ### Write L1 table
331 cur_offset = l2_table_offset
332 for tbl in range(allocated_l1_tables):
333 cluster = bytearray(cluster_size)
334 for idx in range(l1_entries_per_table):
335 l1_idx = tbl * l1_entries_per_table + idx
336 if bitmap_is_set(l1_bitmap, l1_idx):
337 struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED)
338 cur_offset += cluster_size
339 sys.stdout.buffer.write(cluster)
340
341 ### Write L2 tables
342 cur_offset = data_clusters_offset
343 for tbl in range(l1_entries):
344 # Skip the empty L2 tables. We can identify them because
345 # there is no L1 entry pointing at them.
346 if bitmap_is_set(l1_bitmap, tbl):
347 cluster = bytearray(cluster_size)
348 for idx in range(l2_entries_per_table):
349 l2_idx = tbl * l2_entries_per_table + idx
350 if bitmap_is_set(l2_bitmap, l2_idx):
351 if data_file_name is None:
352 struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED)
353 cur_offset += cluster_size
354 else:
355 struct.pack_into(">Q", cluster, idx * 8, (l2_idx * cluster_size) | QCOW_OFLAG_COPIED)
356 sys.stdout.buffer.write(cluster)
357
358 ### Write data clusters
359 if data_file_name is None:
360 for idx in bitmap_iterator(l2_bitmap, total_data_clusters):
361 cluster = os.pread(fd, cluster_size, cluster_size * idx)
362 # If the last cluster is smaller than cluster_size pad it with zeroes
363 if len(cluster) < cluster_size:
364 cluster += bytes(cluster_size - len(cluster))
365 sys.stdout.buffer.write(cluster)
366
367 if not data_file_raw:
368 os.close(fd)
369
370
371def main():
372 # Command-line arguments
373 parser = argparse.ArgumentParser(
374 description="This program converts a QEMU disk image to qcow2 "
375 "and writes it to the standard output"
376 )
377 parser.add_argument("input_file", help="name of the input file")
378 parser.add_argument(
379 "-f",
380 dest="input_format",
381 metavar="input_format",
382 help="format of the input file (default: raw)",
383 default="raw",
384 )
385 parser.add_argument(
386 "-c",
387 dest="cluster_size",
388 metavar="cluster_size",
389 help=f"qcow2 cluster size (default: {QCOW2_DEFAULT_CLUSTER_SIZE})",
390 default=QCOW2_DEFAULT_CLUSTER_SIZE,
391 type=int,
392 choices=[1 << x for x in range(9, 22)],
393 )
394 parser.add_argument(
395 "-r",
396 dest="refcount_bits",
397 metavar="refcount_bits",
398 help=f"width of the reference count entries (default: {QCOW2_DEFAULT_REFCOUNT_BITS})",
399 default=QCOW2_DEFAULT_REFCOUNT_BITS,
400 type=int,
401 choices=[1 << x for x in range(7)],
402 )
403 parser.add_argument(
404 "-d",
405 dest="data_file",
406 help="create an image with input_file as an external data file",
407 action="store_true",
408 )
409 parser.add_argument(
410 "-R",
411 dest="data_file_raw",
412 help="enable data_file_raw on the generated image (implies -d)",
413 action="store_true",
414 )
415 args = parser.parse_args()
416
417 if args.data_file_raw:
418 args.data_file = True
419
420 if not os.path.isfile(args.input_file):
421 sys.exit(f"[Error] {args.input_file} does not exist or is not a regular file.")
422
423 if args.data_file and args.input_format != "raw":
424 sys.exit("[Error] External data files can only be used with raw input images")
425
426 # A 512 byte header is too small for the data file name extension
427 if args.data_file and args.cluster_size == 512:
428 sys.exit("[Error] External data files require a larger cluster size")
429
430 if sys.stdout.isatty():
431 sys.exit("[Error] Refusing to write to a tty. Try redirecting stdout.")
432
433 if args.data_file:
434 data_file_name = args.input_file
435 else:
436 data_file_name = None
437
438 with get_input_as_raw_file(args.input_file, args.input_format) as raw_file:
439 write_qcow2_content(
440 raw_file,
441 args.cluster_size,
442 args.refcount_bits,
443 data_file_name,
444 args.data_file_raw,
445 )
446
447
448if __name__ == "__main__":
449 main()