Blame - scripts/qcow2-to-stdout.py - people/peter.maydell/qemu-arm.git

blob: 06b7c13ccbb547a9eb6e13817971ae8f2f47544d [file] [log] [blame]

Alberto Garcia	df95711	2024-07-30 16:15:52 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2
				3	# This tool reads a disk image in any format and converts it to qcow2,
				4	# writing the result directly to stdout.
				5	#
				6	# Copyright (C) 2024 Igalia, S.L.
				7	#
				8	# Authors: Alberto Garcia <berto@igalia.com>
				9	# Madeeha Javed <javed@igalia.com>
				10	#
				11	# SPDX-License-Identifier: GPL-2.0-or-later
				12	#
				13	# qcow2 files produced by this script are always arranged like this:
				14	#
				15	# - qcow2 header
				16	# - refcount table
				17	# - refcount blocks
				18	# - L1 table
				19	# - L2 tables
				20	# - Data clusters
				21	#
				22	# A note about variable names: in qcow2 there is one refcount table
				23	# and one (active) L1 table, although each can occupy several
				24	# clusters. For the sake of simplicity the code sometimes talks about
				25	# refcount tables and L1 tables when referring to those clusters.
				26
				27	import argparse
				28	import errno
				29	import math
				30	import os
				31	import signal
				32	import struct
				33	import subprocess
				34	import sys
				35	import tempfile
				36	import time
				37	from contextlib import contextmanager
				38
				39	QCOW2_DEFAULT_CLUSTER_SIZE = 65536
				40	QCOW2_DEFAULT_REFCOUNT_BITS = 16
				41	QCOW2_FEATURE_NAME_TABLE = 0x6803F857
				42	QCOW2_DATA_FILE_NAME_STRING = 0x44415441
				43	QCOW2_V3_HEADER_LENGTH = 112 # Header length in QEMU 9.0. Must be a multiple of 8
				44	QCOW2_INCOMPAT_DATA_FILE_BIT = 2
				45	QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT = 1
				46	QCOW_OFLAG_COPIED = 1 << 63
				47	QEMU_STORAGE_DAEMON = "qemu-storage-daemon"
				48
				49
				50	def bitmap_set(bitmap, idx):
				51	bitmap[idx // 8] \|= 1 << (idx % 8)
				52
				53
				54	def bitmap_is_set(bitmap, idx):
				55	return (bitmap[idx // 8] & (1 << (idx % 8))) != 0
				56
				57
				58	def bitmap_iterator(bitmap, length):
				59	for idx in range(length):
				60	if bitmap_is_set(bitmap, idx):
				61	yield idx
				62
				63
				64	def align_up(num, d):
				65	return d * math.ceil(num / d)
				66
				67
				68	# Holes in the input file contain only zeroes so we can skip them and
				69	# save time. This function returns the indexes of the clusters that
				70	# are known to contain data. Those are the ones that we need to read.
				71	def clusters_with_data(fd, cluster_size):
				72	data_to = 0
				73	while True:
				74	try:
				75	data_from = os.lseek(fd, data_to, os.SEEK_DATA)
				76	data_to = align_up(os.lseek(fd, data_from, os.SEEK_HOLE), cluster_size)
				77	for idx in range(data_from // cluster_size, data_to // cluster_size):
				78	yield idx
				79	except OSError as err:
				80	if err.errno == errno.ENXIO: # End of file reached
				81	break
				82	raise err
				83
				84
				85	# write_qcow2_content() expects a raw input file. If we have a different
				86	# format we can use qemu-storage-daemon to make it appear as raw.
				87	@contextmanager
				88	def get_input_as_raw_file(input_file, input_format):
				89	if input_format == "raw":
				90	yield input_file
				91	return
				92	try:
				93	temp_dir = tempfile.mkdtemp()
				94	pid_file = os.path.join(temp_dir, "pid")
				95	raw_file = os.path.join(temp_dir, "raw")
				96	open(raw_file, "wb").close()
				97	ret = subprocess.run(
				98	[
				99	QEMU_STORAGE_DAEMON,
				100	"--daemonize",
				101	"--pidfile", pid_file,
				102	"--blockdev", f"driver=file,node-name=file0,driver=file,filename={input_file},read-only=on",
				103	"--blockdev", f"driver={input_format},node-name=disk0,file=file0,read-only=on",
				104	"--export", f"type=fuse,id=export0,node-name=disk0,mountpoint={raw_file},writable=off",
				105	],
				106	capture_output=True,
				107	)
				108	if ret.returncode != 0:
				109	sys.exit("[Error] Could not start the qemu-storage-daemon:\n" +
				110	ret.stderr.decode().rstrip('\n'))
				111	yield raw_file
				112	finally:
				113	# Kill the storage daemon on exit
				114	# and remove all temporary files
				115	if os.path.exists(pid_file):
				116	with open(pid_file, "r") as f:
				117	pid = int(f.readline())
				118	os.kill(pid, signal.SIGTERM)
				119	while os.path.exists(pid_file):
				120	time.sleep(0.1)
				121	os.unlink(raw_file)
				122	os.rmdir(temp_dir)
				123
				124
				125	def write_features(cluster, offset, data_file_name):
				126	if data_file_name is not None:
				127	encoded_name = data_file_name.encode("utf-8")
				128	padded_name_len = align_up(len(encoded_name), 8)
				129	struct.pack_into(f">II{padded_name_len}s", cluster, offset,
				130	QCOW2_DATA_FILE_NAME_STRING,
				131	len(encoded_name),
				132	encoded_name)
				133	offset += 8 + padded_name_len
				134
				135	qcow2_features = [
				136	# Incompatible
				137	(0, 0, "dirty bit"),
				138	(0, 1, "corrupt bit"),
				139	(0, 2, "external data file"),
				140	(0, 3, "compression type"),
				141	(0, 4, "extended L2 entries"),
				142	# Compatible
				143	(1, 0, "lazy refcounts"),
				144	# Autoclear
				145	(2, 0, "bitmaps"),
				146	(2, 1, "raw external data"),
				147	]
				148	struct.pack_into(">I", cluster, offset, QCOW2_FEATURE_NAME_TABLE)
				149	struct.pack_into(">I", cluster, offset + 4, len(qcow2_features) * 48)
				150	offset += 8
				151	for feature_type, feature_bit, feature_name in qcow2_features:
				152	struct.pack_into(">BB46s", cluster, offset,
				153	feature_type, feature_bit, feature_name.encode("ascii"))
				154	offset += 48
				155
				156
				157	def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, data_file_raw):
				158	# Some basic values
				159	l1_entries_per_table = cluster_size // 8
				160	l2_entries_per_table = cluster_size // 8
				161	refcounts_per_table = cluster_size // 8
				162	refcounts_per_block = cluster_size * 8 // refcount_bits
				163
				164	# Virtual disk size, number of data clusters and L1 entries
				165	disk_size = align_up(os.path.getsize(input_file), 512)
				166	total_data_clusters = math.ceil(disk_size / cluster_size)
				167	l1_entries = math.ceil(total_data_clusters / l2_entries_per_table)
				168	allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table)
				169
				170	# Max L1 table size is 32 MB (QCOW_MAX_L1_SIZE in block/qcow2.h)
				171	if (l1_entries * 8) > (32 * 1024 * 1024):
				172	sys.exit("[Error] The image size is too large. Try using a larger cluster size.")
				173
				174	# Two bitmaps indicating which L1 and L2 entries are set
				175	l1_bitmap = bytearray(allocated_l1_tables * l1_entries_per_table // 8)
				176	l2_bitmap = bytearray(l1_entries * l2_entries_per_table // 8)
				177	allocated_l2_tables = 0
				178	allocated_data_clusters = 0
				179
				180	if data_file_raw:
				181	# If data_file_raw is set then all clusters are allocated and
				182	# we don't need to read the input file at all.
				183	allocated_l2_tables = l1_entries
				184	for idx in range(l1_entries):
				185	bitmap_set(l1_bitmap, idx)
				186	for idx in range(total_data_clusters):
				187	bitmap_set(l2_bitmap, idx)
				188	else:
				189	# Open the input file for reading
				190	fd = os.open(input_file, os.O_RDONLY)
				191	zero_cluster = bytes(cluster_size)
				192	# Read all the clusters that contain data
				193	for idx in clusters_with_data(fd, cluster_size):
				194	cluster = os.pread(fd, cluster_size, cluster_size * idx)
				195	# If the last cluster is smaller than cluster_size pad it with zeroes
				196	if len(cluster) < cluster_size:
				197	cluster += bytes(cluster_size - len(cluster))
				198	# If a cluster has non-zero data then it must be allocated
				199	# in the output file and its L2 entry must be set
				200	if cluster != zero_cluster:
				201	bitmap_set(l2_bitmap, idx)
				202	allocated_data_clusters += 1
				203	# Allocated data clusters also need their corresponding L1 entry and L2 table
				204	l1_idx = math.floor(idx / l2_entries_per_table)
				205	if not bitmap_is_set(l1_bitmap, l1_idx):
				206	bitmap_set(l1_bitmap, l1_idx)
				207	allocated_l2_tables += 1
				208
				209	# Total amount of allocated clusters excluding the refcount blocks and table
				210	total_allocated_clusters = 1 + allocated_l1_tables + allocated_l2_tables
				211	if data_file_name is None:
				212	total_allocated_clusters += allocated_data_clusters
				213
				214	# Clusters allocated for the refcount blocks and table
				215	allocated_refcount_blocks = math.ceil(total_allocated_clusters / refcounts_per_block)
				216	allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table)
				217
				218	# Now we have a problem because allocated_refcount_blocks and allocated_refcount_tables...
				219	# (a) increase total_allocated_clusters, and
				220	# (b) need to be recalculated when total_allocated_clusters is increased
				221	# So we need to repeat the calculation as long as the numbers change
				222	while True:
				223	new_total_allocated_clusters = total_allocated_clusters + allocated_refcount_tables + allocated_refcount_blocks
				224	new_allocated_refcount_blocks = math.ceil(new_total_allocated_clusters / refcounts_per_block)
				225	if new_allocated_refcount_blocks > allocated_refcount_blocks:
				226	allocated_refcount_blocks = new_allocated_refcount_blocks
				227	allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table)
				228	else:
				229	break
				230
				231	# Now that we have the final numbers we can update total_allocated_clusters
				232	total_allocated_clusters += allocated_refcount_tables + allocated_refcount_blocks
				233
				234	# At this point we have the exact number of clusters that the output
				235	# image is going to use so we can calculate all the offsets.
				236	current_cluster_idx = 1
				237
				238	refcount_table_offset = current_cluster_idx * cluster_size
				239	current_cluster_idx += allocated_refcount_tables
				240
				241	refcount_block_offset = current_cluster_idx * cluster_size
				242	current_cluster_idx += allocated_refcount_blocks
				243
				244	l1_table_offset = current_cluster_idx * cluster_size
				245	current_cluster_idx += allocated_l1_tables
				246
				247	l2_table_offset = current_cluster_idx * cluster_size
				248	current_cluster_idx += allocated_l2_tables
				249
				250	data_clusters_offset = current_cluster_idx * cluster_size
				251
				252	# Calculate some values used in the qcow2 header
				253	if allocated_l1_tables == 0:
				254	l1_table_offset = 0
				255
				256	hdr_cluster_bits = int(math.log2(cluster_size))
				257	hdr_refcount_bits = int(math.log2(refcount_bits))
				258	hdr_length = QCOW2_V3_HEADER_LENGTH
				259	hdr_incompat_features = 0
				260	if data_file_name is not None:
				261	hdr_incompat_features \|= 1 << QCOW2_INCOMPAT_DATA_FILE_BIT
				262	hdr_autoclear_features = 0
				263	if data_file_raw:
				264	hdr_autoclear_features \|= 1 << QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT
				265
				266	### Write qcow2 header
				267	cluster = bytearray(cluster_size)
				268	struct.pack_into(">4sIQIIQIIQQIIQQQQII", cluster, 0,
				269	b"QFI\xfb", # QCOW magic string
				270	3, # version
				271	0, # backing file offset
				272	0, # backing file sizes
				273	hdr_cluster_bits,
				274	disk_size,
				275	0, # encryption method
				276	l1_entries,
				277	l1_table_offset,
				278	refcount_table_offset,
				279	allocated_refcount_tables,
				280	0, # number of snapshots
				281	0, # snapshot table offset
				282	hdr_incompat_features,
				283	0, # compatible features
				284	hdr_autoclear_features,
				285	hdr_refcount_bits,
				286	hdr_length,
				287	)
				288
				289	write_features(cluster, hdr_length, data_file_name)
				290
				291	sys.stdout.buffer.write(cluster)
				292
				293	### Write refcount table
				294	cur_offset = refcount_block_offset
				295	remaining_refcount_table_entries = allocated_refcount_blocks # Each entry is a pointer to a refcount block
				296	while remaining_refcount_table_entries > 0:
				297	cluster = bytearray(cluster_size)
				298	to_write = min(remaining_refcount_table_entries, refcounts_per_table)
				299	remaining_refcount_table_entries -= to_write
				300	for idx in range(to_write):
				301	struct.pack_into(">Q", cluster, idx * 8, cur_offset)
				302	cur_offset += cluster_size
				303	sys.stdout.buffer.write(cluster)
				304
				305	### Write refcount blocks
				306	remaining_refcount_block_entries = total_allocated_clusters # One entry for each allocated cluster
				307	for tbl in range(allocated_refcount_blocks):
				308	cluster = bytearray(cluster_size)
				309	to_write = min(remaining_refcount_block_entries, refcounts_per_block)
				310	remaining_refcount_block_entries -= to_write
				311	# All refcount entries contain the number 1. The only difference
				312	# is their bit width, defined when the image is created.
				313	for idx in range(to_write):
				314	if refcount_bits == 64:
				315	struct.pack_into(">Q", cluster, idx * 8, 1)
				316	elif refcount_bits == 32:
				317	struct.pack_into(">L", cluster, idx * 4, 1)
				318	elif refcount_bits == 16:
				319	struct.pack_into(">H", cluster, idx * 2, 1)
				320	elif refcount_bits == 8:
				321	cluster[idx] = 1
				322	elif refcount_bits == 4:
				323	cluster[idx // 2] \|= 1 << ((idx % 2) * 4)
				324	elif refcount_bits == 2:
				325	cluster[idx // 4] \|= 1 << ((idx % 4) * 2)
				326	elif refcount_bits == 1:
				327	cluster[idx // 8] \|= 1 << (idx % 8)
				328	sys.stdout.buffer.write(cluster)
				329
				330	### Write L1 table
				331	cur_offset = l2_table_offset
				332	for tbl in range(allocated_l1_tables):
				333	cluster = bytearray(cluster_size)
				334	for idx in range(l1_entries_per_table):
				335	l1_idx = tbl * l1_entries_per_table + idx
				336	if bitmap_is_set(l1_bitmap, l1_idx):
				337	struct.pack_into(">Q", cluster, idx * 8, cur_offset \| QCOW_OFLAG_COPIED)
				338	cur_offset += cluster_size
				339	sys.stdout.buffer.write(cluster)
				340
				341	### Write L2 tables
				342	cur_offset = data_clusters_offset
				343	for tbl in range(l1_entries):
				344	# Skip the empty L2 tables. We can identify them because
				345	# there is no L1 entry pointing at them.
				346	if bitmap_is_set(l1_bitmap, tbl):
				347	cluster = bytearray(cluster_size)
				348	for idx in range(l2_entries_per_table):
				349	l2_idx = tbl * l2_entries_per_table + idx
				350	if bitmap_is_set(l2_bitmap, l2_idx):
				351	if data_file_name is None:
				352	struct.pack_into(">Q", cluster, idx * 8, cur_offset \| QCOW_OFLAG_COPIED)
				353	cur_offset += cluster_size
				354	else:
				355	struct.pack_into(">Q", cluster, idx * 8, (l2_idx * cluster_size) \| QCOW_OFLAG_COPIED)
				356	sys.stdout.buffer.write(cluster)
				357
				358	### Write data clusters
				359	if data_file_name is None:
				360	for idx in bitmap_iterator(l2_bitmap, total_data_clusters):
				361	cluster = os.pread(fd, cluster_size, cluster_size * idx)
				362	# If the last cluster is smaller than cluster_size pad it with zeroes
				363	if len(cluster) < cluster_size:
				364	cluster += bytes(cluster_size - len(cluster))
				365	sys.stdout.buffer.write(cluster)
				366
				367	if not data_file_raw:
				368	os.close(fd)
				369
				370
				371	def main():
				372	# Command-line arguments
				373	parser = argparse.ArgumentParser(
				374	description="This program converts a QEMU disk image to qcow2 "
				375	"and writes it to the standard output"
				376	)
				377	parser.add_argument("input_file", help="name of the input file")
				378	parser.add_argument(
				379	"-f",
				380	dest="input_format",
				381	metavar="input_format",
				382	help="format of the input file (default: raw)",
				383	default="raw",
				384	)
				385	parser.add_argument(
				386	"-c",
				387	dest="cluster_size",
				388	metavar="cluster_size",
				389	help=f"qcow2 cluster size (default: {QCOW2_DEFAULT_CLUSTER_SIZE})",
				390	default=QCOW2_DEFAULT_CLUSTER_SIZE,
				391	type=int,
				392	choices=[1 << x for x in range(9, 22)],
				393	)
				394	parser.add_argument(
				395	"-r",
				396	dest="refcount_bits",
				397	metavar="refcount_bits",
				398	help=f"width of the reference count entries (default: {QCOW2_DEFAULT_REFCOUNT_BITS})",
				399	default=QCOW2_DEFAULT_REFCOUNT_BITS,
				400	type=int,
				401	choices=[1 << x for x in range(7)],
				402	)
				403	parser.add_argument(
				404	"-d",
				405	dest="data_file",
				406	help="create an image with input_file as an external data file",
				407	action="store_true",
				408	)
				409	parser.add_argument(
				410	"-R",
				411	dest="data_file_raw",
				412	help="enable data_file_raw on the generated image (implies -d)",
				413	action="store_true",
				414	)
				415	args = parser.parse_args()
				416
				417	if args.data_file_raw:
				418	args.data_file = True
				419
				420	if not os.path.isfile(args.input_file):
				421	sys.exit(f"[Error] {args.input_file} does not exist or is not a regular file.")
				422
				423	if args.data_file and args.input_format != "raw":
				424	sys.exit("[Error] External data files can only be used with raw input images")
				425
				426	# A 512 byte header is too small for the data file name extension
				427	if args.data_file and args.cluster_size == 512:
				428	sys.exit("[Error] External data files require a larger cluster size")
				429
				430	if sys.stdout.isatty():
				431	sys.exit("[Error] Refusing to write to a tty. Try redirecting stdout.")
				432
				433	if args.data_file:
				434	data_file_name = args.input_file
				435	else:
				436	data_file_name = None
				437
				438	with get_input_as_raw_file(args.input_file, args.input_format) as raw_file:
				439	write_qcow2_content(
				440	raw_file,
				441	args.cluster_size,
				442	args.refcount_bits,
				443	data_file_name,
				444	args.data_file_raw,
				445	)
				446
				447
				448	if __name__ == "__main__":
				449	main()