aboutsummaryrefslogtreecommitdiff
path: root/Documentation/DocBook
diff options
context:
space:
mode:
authorFathi Boudra <fathi.boudra@linaro.org>2013-04-28 09:33:08 +0300
committerFathi Boudra <fathi.boudra@linaro.org>2013-04-28 09:33:08 +0300
commit3b4bd47f8f4ed3aaf7c81c9b5d2d37ad79fadf4a (patch)
treeb9996006addfd7ae70a39672b76843b49aebc189 /Documentation/DocBook
downloadlinux-linaro-highbank-3b4bd47f8f4ed3aaf7c81c9b5d2d37ad79fadf4a.tar.gz
Imported Upstream version 3.9.0HEADupstream/3.9.0upstreammaster
Diffstat (limited to 'Documentation/DocBook')
-rw-r--r--Documentation/DocBook/80211.tmpl582
-rw-r--r--Documentation/DocBook/Makefile241
-rw-r--r--Documentation/DocBook/alsa-driver-api.tmpl109
-rw-r--r--Documentation/DocBook/debugobjects.tmpl441
-rw-r--r--Documentation/DocBook/device-drivers.tmpl466
-rw-r--r--Documentation/DocBook/deviceiobook.tmpl323
-rw-r--r--Documentation/DocBook/drm.tmpl2540
-rw-r--r--Documentation/DocBook/filesystems.tmpl426
-rw-r--r--Documentation/DocBook/gadget.tmpl793
-rw-r--r--Documentation/DocBook/genericirq.tmpl507
-rw-r--r--Documentation/DocBook/kernel-api.tmpl330
-rw-r--r--Documentation/DocBook/kernel-hacking.tmpl1320
-rw-r--r--Documentation/DocBook/kernel-locking.tmpl2146
-rw-r--r--Documentation/DocBook/kgdb.tmpl917
-rw-r--r--Documentation/DocBook/libata.tmpl1625
-rw-r--r--Documentation/DocBook/librs.tmpl289
-rw-r--r--Documentation/DocBook/lsm.tmpl265
-rw-r--r--Documentation/DocBook/media/Makefile388
-rw-r--r--Documentation/DocBook/media/bayer.png.b64171
-rw-r--r--Documentation/DocBook/media/constraints.png.b6459
-rw-r--r--Documentation/DocBook/media/crop.gif.b64105
-rw-r--r--Documentation/DocBook/media/dvb/audio.xml1314
-rw-r--r--Documentation/DocBook/media/dvb/ca.xml582
-rw-r--r--Documentation/DocBook/media/dvb/demux.xml1130
-rw-r--r--Documentation/DocBook/media/dvb/dvbapi.xml141
-rw-r--r--Documentation/DocBook/media/dvb/dvbproperty.xml1283
-rw-r--r--Documentation/DocBook/media/dvb/dvbstb.pdfbin0 -> 1881 bytes
-rw-r--r--Documentation/DocBook/media/dvb/examples.xml365
-rw-r--r--Documentation/DocBook/media/dvb/frontend.xml1548
-rw-r--r--Documentation/DocBook/media/dvb/intro.xml212
-rw-r--r--Documentation/DocBook/media/dvb/kdapi.xml2309
-rw-r--r--Documentation/DocBook/media/dvb/net.xml156
-rw-r--r--Documentation/DocBook/media/dvb/video.xml1968
-rw-r--r--Documentation/DocBook/media/dvbstb.png.b64398
-rw-r--r--Documentation/DocBook/media/fieldseq_bt.gif.b64447
-rw-r--r--Documentation/DocBook/media/fieldseq_tb.gif.b64445
-rw-r--r--Documentation/DocBook/media/nv12mt.gif.b6437
-rw-r--r--Documentation/DocBook/media/nv12mt_example.gif.b64121
-rw-r--r--Documentation/DocBook/media/pipeline.png.b64213
-rw-r--r--Documentation/DocBook/media/selection.png.b64206
-rw-r--r--Documentation/DocBook/media/v4l/biblio.xml269
-rw-r--r--Documentation/DocBook/media/v4l/capture.c.xml659
-rw-r--r--Documentation/DocBook/media/v4l/common.xml1214
-rw-r--r--Documentation/DocBook/media/v4l/compat.xml2640
-rw-r--r--Documentation/DocBook/media/v4l/controls.xml4690
-rw-r--r--Documentation/DocBook/media/v4l/crop.pdfbin0 -> 5846 bytes
-rw-r--r--Documentation/DocBook/media/v4l/dev-capture.xml110
-rw-r--r--Documentation/DocBook/media/v4l/dev-codec.xml18
-rw-r--r--Documentation/DocBook/media/v4l/dev-effect.xml17
-rw-r--r--Documentation/DocBook/media/v4l/dev-event.xml43
-rw-r--r--Documentation/DocBook/media/v4l/dev-osd.xml149
-rw-r--r--Documentation/DocBook/media/v4l/dev-output.xml106
-rw-r--r--Documentation/DocBook/media/v4l/dev-overlay.xml371
-rw-r--r--Documentation/DocBook/media/v4l/dev-radio.xml49
-rw-r--r--Documentation/DocBook/media/v4l/dev-raw-vbi.xml339
-rw-r--r--Documentation/DocBook/media/v4l/dev-rds.xml196
-rw-r--r--Documentation/DocBook/media/v4l/dev-sliced-vbi.xml699
-rw-r--r--Documentation/DocBook/media/v4l/dev-subdev.xml467
-rw-r--r--Documentation/DocBook/media/v4l/dev-teletext.xml29
-rw-r--r--Documentation/DocBook/media/v4l/driver.xml200
-rw-r--r--Documentation/DocBook/media/v4l/fdl-appendix.xml671
-rw-r--r--Documentation/DocBook/media/v4l/fieldseq_bt.pdfbin0 -> 9185 bytes
-rw-r--r--Documentation/DocBook/media/v4l/fieldseq_tb.pdfbin0 -> 9173 bytes
-rw-r--r--Documentation/DocBook/media/v4l/func-close.xml62
-rw-r--r--Documentation/DocBook/media/v4l/func-ioctl.xml71
-rw-r--r--Documentation/DocBook/media/v4l/func-mmap.xml183
-rw-r--r--Documentation/DocBook/media/v4l/func-munmap.xml76
-rw-r--r--Documentation/DocBook/media/v4l/func-open.xml113
-rw-r--r--Documentation/DocBook/media/v4l/func-poll.xml119
-rw-r--r--Documentation/DocBook/media/v4l/func-read.xml181
-rw-r--r--Documentation/DocBook/media/v4l/func-select.xml130
-rw-r--r--Documentation/DocBook/media/v4l/func-write.xml128
-rw-r--r--Documentation/DocBook/media/v4l/gen-errors.xml77
-rw-r--r--Documentation/DocBook/media/v4l/io.xml1481
-rw-r--r--Documentation/DocBook/media/v4l/keytable.c.xml172
-rw-r--r--Documentation/DocBook/media/v4l/libv4l.xml160
-rw-r--r--Documentation/DocBook/media/v4l/lirc_device_interface.xml253
-rw-r--r--Documentation/DocBook/media/v4l/media-controller.xml89
-rw-r--r--Documentation/DocBook/media/v4l/media-func-close.xml59
-rw-r--r--Documentation/DocBook/media/v4l/media-func-ioctl.xml73
-rw-r--r--Documentation/DocBook/media/v4l/media-func-open.xml94
-rw-r--r--Documentation/DocBook/media/v4l/media-ioc-device-info.xml132
-rw-r--r--Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml308
-rw-r--r--Documentation/DocBook/media/v4l/media-ioc-enum-links.xml207
-rw-r--r--Documentation/DocBook/media/v4l/media-ioc-setup-link.xml84
-rw-r--r--Documentation/DocBook/media/v4l/pipeline.pdfbin0 -> 20276 bytes
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-grey.xml62
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-m420.xml139
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv12.xml143
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv12m.xml153
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv12mt.xml66
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv16.xml166
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv24.xml121
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml935
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-packed-yuv.xml236
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sbggr16.xml83
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sbggr8.xml67
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sgbrg8.xml67
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sgrbg8.xml67
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb10.xml90
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml34
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb10dpcm8.xml28
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb12.xml90
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb8.xml67
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-uv8.xml62
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-uyvy.xml120
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-vyuy.xml120
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-y10.xml79
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-y10b.xml43
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-y12.xml79
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-y16.xml81
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-y41p.xml149
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuv410.xml133
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuv411p.xml147
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuv420.xml149
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuv420m.xml154
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuv422p.xml153
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuyv.xml120
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yvu420m.xml154
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yvyu.xml120
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt.xml1042
-rw-r--r--Documentation/DocBook/media/v4l/planar-apis.xml62
-rw-r--r--Documentation/DocBook/media/v4l/remote_controllers.xml177
-rw-r--r--Documentation/DocBook/media/v4l/selection-api.xml325
-rw-r--r--Documentation/DocBook/media/v4l/selections-common.xml164
-rw-r--r--Documentation/DocBook/media/v4l/subdev-formats.xml2519
-rw-r--r--Documentation/DocBook/media/v4l/subdev-image-processing-crop.dia614
-rw-r--r--Documentation/DocBook/media/v4l/subdev-image-processing-crop.svg63
-rw-r--r--Documentation/DocBook/media/v4l/subdev-image-processing-full.dia1588
-rw-r--r--Documentation/DocBook/media/v4l/subdev-image-processing-full.svg163
-rw-r--r--Documentation/DocBook/media/v4l/subdev-image-processing-scaling-multi-source.dia1152
-rw-r--r--Documentation/DocBook/media/v4l/subdev-image-processing-scaling-multi-source.svg116
-rw-r--r--Documentation/DocBook/media/v4l/v4l2.xml636
-rw-r--r--Documentation/DocBook/media/v4l/v4l2grab.c.xml164
-rw-r--r--Documentation/DocBook/media/v4l/vbi_525.pdfbin0 -> 3395 bytes
-rw-r--r--Documentation/DocBook/media/v4l/vbi_625.pdfbin0 -> 3683 bytes
-rw-r--r--Documentation/DocBook/media/v4l/vbi_hsync.pdfbin0 -> 7405 bytes
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-create-bufs.xml154
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-cropcap.xml167
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-ident.xml266
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml258
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-decoder-cmd.xml249
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-dqevent.xml277
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-dv-timings-cap.xml205
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-encoder-cmd.xml189
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-dv-presets.xml240
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-dv-timings.xml125
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-fmt.xml159
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-frameintervals.xml259
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-framesizes.xml264
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-freq-bands.xml179
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enumaudio.xml76
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enumaudioout.xml79
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enuminput.xml313
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enumoutput.xml198
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enumstd.xml389
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-expbuf.xml208
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-audio.xml172
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-audioout.xml138
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-crop.xml124
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-ctrl.xml133
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-dv-preset.xml113
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-dv-timings.xml331
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-enc-index.xml189
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-ext-ctrls.xml378
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml463
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-fmt.xml197
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-frequency.xml147
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-input.xml83
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-jpegcomp.xml175
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-modulator.xml238
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-output.xml85
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-parm.xml314
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-priority.xml135
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-selection.xml241
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-sliced-vbi-cap.xml255
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-std.xml98
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-tuner.xml569
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-log-status.xml41
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-overlay.xml74
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-prepare-buf.xml94
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-qbuf.xml192
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-query-dv-preset.xml78
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-query-dv-timings.xml110
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-querybuf.xml105
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-querycap.xml332
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-queryctrl.xml480
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-querystd.xml74
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-reqbufs.xml137
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-s-hw-freq-seek.xml184
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-streamon.xml112
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-interval.xml152
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-size.xml154
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-enum-mbus-code.xml119
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-crop.xml158
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-edid.xml152
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-fmt.xml183
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-frame-interval.xml141
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-selection.xml165
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subscribe-event.xml206
-rw-r--r--Documentation/DocBook/media/vbi_525.gif.b6484
-rw-r--r--Documentation/DocBook/media/vbi_625.gif.b6490
-rw-r--r--Documentation/DocBook/media/vbi_hsync.gif.b6443
-rw-r--r--Documentation/DocBook/media_api.tmpl123
-rw-r--r--Documentation/DocBook/mtdnand.tmpl1296
-rw-r--r--Documentation/DocBook/networking.tmpl111
-rw-r--r--Documentation/DocBook/rapidio.tmpl158
-rw-r--r--Documentation/DocBook/regulator.tmpl304
-rw-r--r--Documentation/DocBook/s390-drivers.tmpl161
-rw-r--r--Documentation/DocBook/scsi.tmpl409
-rw-r--r--Documentation/DocBook/sh.tmpl105
-rw-r--r--Documentation/DocBook/stylesheet.xsl10
-rw-r--r--Documentation/DocBook/tracepoint.tmpl112
-rw-r--r--Documentation/DocBook/uio-howto.tmpl1050
-rw-r--r--Documentation/DocBook/usb.tmpl980
-rw-r--r--Documentation/DocBook/writing-an-alsa-driver.tmpl6200
-rw-r--r--Documentation/DocBook/writing_usb_driver.tmpl412
-rw-r--r--Documentation/DocBook/z8530book.tmpl371
218 files changed, 81662 insertions, 0 deletions
diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
new file mode 100644
index 00000000..284ced7a
--- /dev/null
+++ b/Documentation/DocBook/80211.tmpl
@@ -0,0 +1,582 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE set PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+<set>
+ <setinfo>
+ <title>The 802.11 subsystems &ndash; for kernel developers</title>
+ <subtitle>
+ Explaining wireless 802.11 networking in the Linux kernel
+ </subtitle>
+
+ <copyright>
+ <year>2007-2009</year>
+ <holder>Johannes Berg</holder>
+ </copyright>
+
+ <authorgroup>
+ <author>
+ <firstname>Johannes</firstname>
+ <surname>Berg</surname>
+ <affiliation>
+ <address><email>johannes@sipsolutions.net</email></address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+ <para>
+ This documentation is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this documentation; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+
+ <abstract>
+ <para>
+ These books attempt to give a description of the
+ various subsystems that play a role in 802.11 wireless
+ networking in Linux. Since these books are for kernel
+ developers they attempts to document the structures
+ and functions used in the kernel as well as giving a
+ higher-level overview.
+ </para>
+ <para>
+ The reader is expected to be familiar with the 802.11
+ standard as published by the IEEE in 802.11-2007 (or
+ possibly later versions). References to this standard
+ will be given as "802.11-2007 8.1.5".
+ </para>
+ </abstract>
+ </setinfo>
+ <book id="cfg80211-developers-guide">
+ <bookinfo>
+ <title>The cfg80211 subsystem</title>
+
+ <abstract>
+!Pinclude/net/cfg80211.h Introduction
+ </abstract>
+ </bookinfo>
+ <chapter>
+ <title>Device registration</title>
+!Pinclude/net/cfg80211.h Device registration
+!Finclude/net/cfg80211.h ieee80211_band
+!Finclude/net/cfg80211.h ieee80211_channel_flags
+!Finclude/net/cfg80211.h ieee80211_channel
+!Finclude/net/cfg80211.h ieee80211_rate_flags
+!Finclude/net/cfg80211.h ieee80211_rate
+!Finclude/net/cfg80211.h ieee80211_sta_ht_cap
+!Finclude/net/cfg80211.h ieee80211_supported_band
+!Finclude/net/cfg80211.h cfg80211_signal_type
+!Finclude/net/cfg80211.h wiphy_params_flags
+!Finclude/net/cfg80211.h wiphy_flags
+!Finclude/net/cfg80211.h wiphy
+!Finclude/net/cfg80211.h wireless_dev
+!Finclude/net/cfg80211.h wiphy_new
+!Finclude/net/cfg80211.h wiphy_register
+!Finclude/net/cfg80211.h wiphy_unregister
+!Finclude/net/cfg80211.h wiphy_free
+
+!Finclude/net/cfg80211.h wiphy_name
+!Finclude/net/cfg80211.h wiphy_dev
+!Finclude/net/cfg80211.h wiphy_priv
+!Finclude/net/cfg80211.h priv_to_wiphy
+!Finclude/net/cfg80211.h set_wiphy_dev
+!Finclude/net/cfg80211.h wdev_priv
+ </chapter>
+ <chapter>
+ <title>Actions and configuration</title>
+!Pinclude/net/cfg80211.h Actions and configuration
+!Finclude/net/cfg80211.h cfg80211_ops
+!Finclude/net/cfg80211.h vif_params
+!Finclude/net/cfg80211.h key_params
+!Finclude/net/cfg80211.h survey_info_flags
+!Finclude/net/cfg80211.h survey_info
+!Finclude/net/cfg80211.h cfg80211_beacon_data
+!Finclude/net/cfg80211.h cfg80211_ap_settings
+!Finclude/net/cfg80211.h station_parameters
+!Finclude/net/cfg80211.h station_info_flags
+!Finclude/net/cfg80211.h rate_info_flags
+!Finclude/net/cfg80211.h rate_info
+!Finclude/net/cfg80211.h station_info
+!Finclude/net/cfg80211.h monitor_flags
+!Finclude/net/cfg80211.h mpath_info_flags
+!Finclude/net/cfg80211.h mpath_info
+!Finclude/net/cfg80211.h bss_parameters
+!Finclude/net/cfg80211.h ieee80211_txq_params
+!Finclude/net/cfg80211.h cfg80211_crypto_settings
+!Finclude/net/cfg80211.h cfg80211_auth_request
+!Finclude/net/cfg80211.h cfg80211_assoc_request
+!Finclude/net/cfg80211.h cfg80211_deauth_request
+!Finclude/net/cfg80211.h cfg80211_disassoc_request
+!Finclude/net/cfg80211.h cfg80211_ibss_params
+!Finclude/net/cfg80211.h cfg80211_connect_params
+!Finclude/net/cfg80211.h cfg80211_pmksa
+!Finclude/net/cfg80211.h cfg80211_send_rx_auth
+!Finclude/net/cfg80211.h cfg80211_send_auth_timeout
+!Finclude/net/cfg80211.h cfg80211_send_rx_assoc
+!Finclude/net/cfg80211.h cfg80211_send_assoc_timeout
+!Finclude/net/cfg80211.h cfg80211_send_deauth
+!Finclude/net/cfg80211.h __cfg80211_send_deauth
+!Finclude/net/cfg80211.h cfg80211_send_disassoc
+!Finclude/net/cfg80211.h __cfg80211_send_disassoc
+!Finclude/net/cfg80211.h cfg80211_ibss_joined
+!Finclude/net/cfg80211.h cfg80211_connect_result
+!Finclude/net/cfg80211.h cfg80211_roamed
+!Finclude/net/cfg80211.h cfg80211_disconnected
+!Finclude/net/cfg80211.h cfg80211_ready_on_channel
+!Finclude/net/cfg80211.h cfg80211_remain_on_channel_expired
+!Finclude/net/cfg80211.h cfg80211_new_sta
+!Finclude/net/cfg80211.h cfg80211_rx_mgmt
+!Finclude/net/cfg80211.h cfg80211_mgmt_tx_status
+!Finclude/net/cfg80211.h cfg80211_cqm_rssi_notify
+!Finclude/net/cfg80211.h cfg80211_cqm_pktloss_notify
+!Finclude/net/cfg80211.h cfg80211_michael_mic_failure
+ </chapter>
+ <chapter>
+ <title>Scanning and BSS list handling</title>
+!Pinclude/net/cfg80211.h Scanning and BSS list handling
+!Finclude/net/cfg80211.h cfg80211_ssid
+!Finclude/net/cfg80211.h cfg80211_scan_request
+!Finclude/net/cfg80211.h cfg80211_scan_done
+!Finclude/net/cfg80211.h cfg80211_bss
+!Finclude/net/cfg80211.h cfg80211_inform_bss_frame
+!Finclude/net/cfg80211.h cfg80211_inform_bss
+!Finclude/net/cfg80211.h cfg80211_unlink_bss
+!Finclude/net/cfg80211.h cfg80211_find_ie
+!Finclude/net/cfg80211.h ieee80211_bss_get_ie
+ </chapter>
+ <chapter>
+ <title>Utility functions</title>
+!Pinclude/net/cfg80211.h Utility functions
+!Finclude/net/cfg80211.h ieee80211_channel_to_frequency
+!Finclude/net/cfg80211.h ieee80211_frequency_to_channel
+!Finclude/net/cfg80211.h ieee80211_get_channel
+!Finclude/net/cfg80211.h ieee80211_get_response_rate
+!Finclude/net/cfg80211.h ieee80211_hdrlen
+!Finclude/net/cfg80211.h ieee80211_get_hdrlen_from_skb
+!Finclude/net/cfg80211.h ieee80211_radiotap_iterator
+ </chapter>
+ <chapter>
+ <title>Data path helpers</title>
+!Pinclude/net/cfg80211.h Data path helpers
+!Finclude/net/cfg80211.h ieee80211_data_to_8023
+!Finclude/net/cfg80211.h ieee80211_data_from_8023
+!Finclude/net/cfg80211.h ieee80211_amsdu_to_8023s
+!Finclude/net/cfg80211.h cfg80211_classify8021d
+ </chapter>
+ <chapter>
+ <title>Regulatory enforcement infrastructure</title>
+!Pinclude/net/cfg80211.h Regulatory enforcement infrastructure
+!Finclude/net/cfg80211.h regulatory_hint
+!Finclude/net/cfg80211.h wiphy_apply_custom_regulatory
+!Finclude/net/cfg80211.h freq_reg_info
+ </chapter>
+ <chapter>
+ <title>RFkill integration</title>
+!Pinclude/net/cfg80211.h RFkill integration
+!Finclude/net/cfg80211.h wiphy_rfkill_set_hw_state
+!Finclude/net/cfg80211.h wiphy_rfkill_start_polling
+!Finclude/net/cfg80211.h wiphy_rfkill_stop_polling
+ </chapter>
+ <chapter>
+ <title>Test mode</title>
+!Pinclude/net/cfg80211.h Test mode
+!Finclude/net/cfg80211.h cfg80211_testmode_alloc_reply_skb
+!Finclude/net/cfg80211.h cfg80211_testmode_reply
+!Finclude/net/cfg80211.h cfg80211_testmode_alloc_event_skb
+!Finclude/net/cfg80211.h cfg80211_testmode_event
+ </chapter>
+ </book>
+ <book id="mac80211-developers-guide">
+ <bookinfo>
+ <title>The mac80211 subsystem</title>
+ <abstract>
+!Pinclude/net/mac80211.h Introduction
+!Pinclude/net/mac80211.h Warning
+ </abstract>
+ </bookinfo>
+
+ <toc></toc>
+
+ <!--
+ Generally, this document shall be ordered by increasing complexity.
+ It is important to note that readers should be able to read only
+ the first few sections to get a working driver and only advanced
+ usage should require reading the full document.
+ -->
+
+ <part>
+ <title>The basic mac80211 driver interface</title>
+ <partintro>
+ <para>
+ You should read and understand the information contained
+ within this part of the book while implementing a driver.
+ In some chapters, advanced usage is noted, that may be
+ skipped at first.
+ </para>
+ <para>
+ This part of the book only covers station and monitor mode
+ functionality, additional information required to implement
+ the other modes is covered in the second part of the book.
+ </para>
+ </partintro>
+
+ <chapter id="basics">
+ <title>Basic hardware handling</title>
+ <para>TBD</para>
+ <para>
+ This chapter shall contain information on getting a hw
+ struct allocated and registered with mac80211.
+ </para>
+ <para>
+ Since it is required to allocate rates/modes before registering
+ a hw struct, this chapter shall also contain information on setting
+ up the rate/mode structs.
+ </para>
+ <para>
+ Additionally, some discussion about the callbacks and
+ the general programming model should be in here, including
+ the definition of ieee80211_ops which will be referred to
+ a lot.
+ </para>
+ <para>
+ Finally, a discussion of hardware capabilities should be done
+ with references to other parts of the book.
+ </para>
+ <!-- intentionally multiple !F lines to get proper order -->
+!Finclude/net/mac80211.h ieee80211_hw
+!Finclude/net/mac80211.h ieee80211_hw_flags
+!Finclude/net/mac80211.h SET_IEEE80211_DEV
+!Finclude/net/mac80211.h SET_IEEE80211_PERM_ADDR
+!Finclude/net/mac80211.h ieee80211_ops
+!Finclude/net/mac80211.h ieee80211_alloc_hw
+!Finclude/net/mac80211.h ieee80211_register_hw
+!Finclude/net/mac80211.h ieee80211_unregister_hw
+!Finclude/net/mac80211.h ieee80211_free_hw
+ </chapter>
+
+ <chapter id="phy-handling">
+ <title>PHY configuration</title>
+ <para>TBD</para>
+ <para>
+ This chapter should describe PHY handling including
+ start/stop callbacks and the various structures used.
+ </para>
+!Finclude/net/mac80211.h ieee80211_conf
+!Finclude/net/mac80211.h ieee80211_conf_flags
+ </chapter>
+
+ <chapter id="iface-handling">
+ <title>Virtual interfaces</title>
+ <para>TBD</para>
+ <para>
+ This chapter should describe virtual interface basics
+ that are relevant to the driver (VLANs, MGMT etc are not.)
+ It should explain the use of the add_iface/remove_iface
+ callbacks as well as the interface configuration callbacks.
+ </para>
+ <para>Things related to AP mode should be discussed there.</para>
+ <para>
+ Things related to supporting multiple interfaces should be
+ in the appropriate chapter, a BIG FAT note should be here about
+ this though and the recommendation to allow only a single
+ interface in STA mode at first!
+ </para>
+!Finclude/net/mac80211.h ieee80211_vif
+ </chapter>
+
+ <chapter id="rx-tx">
+ <title>Receive and transmit processing</title>
+ <sect1>
+ <title>what should be here</title>
+ <para>TBD</para>
+ <para>
+ This should describe the receive and transmit
+ paths in mac80211/the drivers as well as
+ transmit status handling.
+ </para>
+ </sect1>
+ <sect1>
+ <title>Frame format</title>
+!Pinclude/net/mac80211.h Frame format
+ </sect1>
+ <sect1>
+ <title>Packet alignment</title>
+!Pnet/mac80211/rx.c Packet alignment
+ </sect1>
+ <sect1>
+ <title>Calling into mac80211 from interrupts</title>
+!Pinclude/net/mac80211.h Calling mac80211 from interrupts
+ </sect1>
+ <sect1>
+ <title>functions/definitions</title>
+!Finclude/net/mac80211.h ieee80211_rx_status
+!Finclude/net/mac80211.h mac80211_rx_flags
+!Finclude/net/mac80211.h mac80211_tx_control_flags
+!Finclude/net/mac80211.h mac80211_rate_control_flags
+!Finclude/net/mac80211.h ieee80211_tx_rate
+!Finclude/net/mac80211.h ieee80211_tx_info
+!Finclude/net/mac80211.h ieee80211_tx_info_clear_status
+!Finclude/net/mac80211.h ieee80211_rx
+!Finclude/net/mac80211.h ieee80211_rx_ni
+!Finclude/net/mac80211.h ieee80211_rx_irqsafe
+!Finclude/net/mac80211.h ieee80211_tx_status
+!Finclude/net/mac80211.h ieee80211_tx_status_ni
+!Finclude/net/mac80211.h ieee80211_tx_status_irqsafe
+!Finclude/net/mac80211.h ieee80211_rts_get
+!Finclude/net/mac80211.h ieee80211_rts_duration
+!Finclude/net/mac80211.h ieee80211_ctstoself_get
+!Finclude/net/mac80211.h ieee80211_ctstoself_duration
+!Finclude/net/mac80211.h ieee80211_generic_frame_duration
+!Finclude/net/mac80211.h ieee80211_wake_queue
+!Finclude/net/mac80211.h ieee80211_stop_queue
+!Finclude/net/mac80211.h ieee80211_wake_queues
+!Finclude/net/mac80211.h ieee80211_stop_queues
+!Finclude/net/mac80211.h ieee80211_queue_stopped
+ </sect1>
+ </chapter>
+
+ <chapter id="filters">
+ <title>Frame filtering</title>
+!Pinclude/net/mac80211.h Frame filtering
+!Finclude/net/mac80211.h ieee80211_filter_flags
+ </chapter>
+
+ <chapter id="workqueue">
+ <title>The mac80211 workqueue</title>
+!Pinclude/net/mac80211.h mac80211 workqueue
+!Finclude/net/mac80211.h ieee80211_queue_work
+!Finclude/net/mac80211.h ieee80211_queue_delayed_work
+ </chapter>
+ </part>
+
+ <part id="advanced">
+ <title>Advanced driver interface</title>
+ <partintro>
+ <para>
+ Information contained within this part of the book is
+ of interest only for advanced interaction of mac80211
+ with drivers to exploit more hardware capabilities and
+ improve performance.
+ </para>
+ </partintro>
+
+ <chapter id="led-support">
+ <title>LED support</title>
+ <para>
+ Mac80211 supports various ways of blinking LEDs. Wherever possible,
+ device LEDs should be exposed as LED class devices and hooked up to
+ the appropriate trigger, which will then be triggered appropriately
+ by mac80211.
+ </para>
+!Finclude/net/mac80211.h ieee80211_get_tx_led_name
+!Finclude/net/mac80211.h ieee80211_get_rx_led_name
+!Finclude/net/mac80211.h ieee80211_get_assoc_led_name
+!Finclude/net/mac80211.h ieee80211_get_radio_led_name
+!Finclude/net/mac80211.h ieee80211_tpt_blink
+!Finclude/net/mac80211.h ieee80211_tpt_led_trigger_flags
+!Finclude/net/mac80211.h ieee80211_create_tpt_led_trigger
+ </chapter>
+
+ <chapter id="hardware-crypto-offload">
+ <title>Hardware crypto acceleration</title>
+!Pinclude/net/mac80211.h Hardware crypto acceleration
+ <!-- intentionally multiple !F lines to get proper order -->
+!Finclude/net/mac80211.h set_key_cmd
+!Finclude/net/mac80211.h ieee80211_key_conf
+!Finclude/net/mac80211.h ieee80211_key_flags
+!Finclude/net/mac80211.h ieee80211_get_tkip_p1k
+!Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv
+!Finclude/net/mac80211.h ieee80211_get_tkip_p2k
+ </chapter>
+
+ <chapter id="powersave">
+ <title>Powersave support</title>
+!Pinclude/net/mac80211.h Powersave support
+ </chapter>
+
+ <chapter id="beacon-filter">
+ <title>Beacon filter support</title>
+!Pinclude/net/mac80211.h Beacon filter support
+!Finclude/net/mac80211.h ieee80211_beacon_loss
+ </chapter>
+
+ <chapter id="qos">
+ <title>Multiple queues and QoS support</title>
+ <para>TBD</para>
+!Finclude/net/mac80211.h ieee80211_tx_queue_params
+ </chapter>
+
+ <chapter id="AP">
+ <title>Access point mode support</title>
+ <para>TBD</para>
+ <para>Some parts of the if_conf should be discussed here instead</para>
+ <para>
+ Insert notes about VLAN interfaces with hw crypto here or
+ in the hw crypto chapter.
+ </para>
+ <section id="ps-client">
+ <title>support for powersaving clients</title>
+!Pinclude/net/mac80211.h AP support for powersaving clients
+ </section>
+!Finclude/net/mac80211.h ieee80211_get_buffered_bc
+!Finclude/net/mac80211.h ieee80211_beacon_get
+!Finclude/net/mac80211.h ieee80211_sta_eosp_irqsafe
+!Finclude/net/mac80211.h ieee80211_frame_release_type
+!Finclude/net/mac80211.h ieee80211_sta_ps_transition
+!Finclude/net/mac80211.h ieee80211_sta_ps_transition_ni
+!Finclude/net/mac80211.h ieee80211_sta_set_buffered
+!Finclude/net/mac80211.h ieee80211_sta_block_awake
+ </chapter>
+
+ <chapter id="multi-iface">
+ <title>Supporting multiple virtual interfaces</title>
+ <para>TBD</para>
+ <para>
+ Note: WDS with identical MAC address should almost always be OK
+ </para>
+ <para>
+ Insert notes about having multiple virtual interfaces with
+ different MAC addresses here, note which configurations are
+ supported by mac80211, add notes about supporting hw crypto
+ with it.
+ </para>
+!Finclude/net/mac80211.h ieee80211_iterate_active_interfaces
+!Finclude/net/mac80211.h ieee80211_iterate_active_interfaces_atomic
+ </chapter>
+
+ <chapter id="station-handling">
+ <title>Station handling</title>
+ <para>TODO</para>
+!Finclude/net/mac80211.h ieee80211_sta
+!Finclude/net/mac80211.h sta_notify_cmd
+!Finclude/net/mac80211.h ieee80211_find_sta
+!Finclude/net/mac80211.h ieee80211_find_sta_by_ifaddr
+ </chapter>
+
+ <chapter id="hardware-scan-offload">
+ <title>Hardware scan offload</title>
+ <para>TBD</para>
+!Finclude/net/mac80211.h ieee80211_scan_completed
+ </chapter>
+
+ <chapter id="aggregation">
+ <title>Aggregation</title>
+ <sect1>
+ <title>TX A-MPDU aggregation</title>
+!Pnet/mac80211/agg-tx.c TX A-MPDU aggregation
+!Cnet/mac80211/agg-tx.c
+ </sect1>
+ <sect1>
+ <title>RX A-MPDU aggregation</title>
+!Pnet/mac80211/agg-rx.c RX A-MPDU aggregation
+!Cnet/mac80211/agg-rx.c
+ </sect1>
+!Finclude/net/mac80211.h ieee80211_ampdu_mlme_action
+ </chapter>
+
+ <chapter id="smps">
+ <title>Spatial Multiplexing Powersave (SMPS)</title>
+!Pinclude/net/mac80211.h Spatial multiplexing power save
+!Finclude/net/mac80211.h ieee80211_request_smps
+!Finclude/net/mac80211.h ieee80211_smps_mode
+ </chapter>
+ </part>
+
+ <part id="rate-control">
+ <title>Rate control interface</title>
+ <partintro>
+ <para>TBD</para>
+ <para>
+ This part of the book describes the rate control algorithm
+ interface and how it relates to mac80211 and drivers.
+ </para>
+ </partintro>
+ <chapter id="ratecontrol-api">
+ <title>Rate Control API</title>
+ <para>TBD</para>
+!Finclude/net/mac80211.h ieee80211_start_tx_ba_session
+!Finclude/net/mac80211.h ieee80211_start_tx_ba_cb_irqsafe
+!Finclude/net/mac80211.h ieee80211_stop_tx_ba_session
+!Finclude/net/mac80211.h ieee80211_stop_tx_ba_cb_irqsafe
+!Finclude/net/mac80211.h ieee80211_rate_control_changed
+!Finclude/net/mac80211.h ieee80211_tx_rate_control
+!Finclude/net/mac80211.h rate_control_send_low
+ </chapter>
+ </part>
+
+ <part id="internal">
+ <title>Internals</title>
+ <partintro>
+ <para>TBD</para>
+ <para>
+ This part of the book describes mac80211 internals.
+ </para>
+ </partintro>
+
+ <chapter id="key-handling">
+ <title>Key handling</title>
+ <sect1>
+ <title>Key handling basics</title>
+!Pnet/mac80211/key.c Key handling basics
+ </sect1>
+ <sect1>
+ <title>MORE TBD</title>
+ <para>TBD</para>
+ </sect1>
+ </chapter>
+
+ <chapter id="rx-processing">
+ <title>Receive processing</title>
+ <para>TBD</para>
+ </chapter>
+
+ <chapter id="tx-processing">
+ <title>Transmit processing</title>
+ <para>TBD</para>
+ </chapter>
+
+ <chapter id="sta-info">
+ <title>Station info handling</title>
+ <sect1>
+ <title>Programming information</title>
+!Fnet/mac80211/sta_info.h sta_info
+!Fnet/mac80211/sta_info.h ieee80211_sta_info_flags
+ </sect1>
+ <sect1>
+ <title>STA information lifetime rules</title>
+!Pnet/mac80211/sta_info.c STA information lifetime rules
+ </sect1>
+ </chapter>
+
+ <chapter id="aggregation-internals">
+ <title>Aggregation</title>
+!Fnet/mac80211/sta_info.h sta_ampdu_mlme
+!Fnet/mac80211/sta_info.h tid_ampdu_tx
+!Fnet/mac80211/sta_info.h tid_ampdu_rx
+ </chapter>
+
+ <chapter id="synchronisation">
+ <title>Synchronisation</title>
+ <para>TBD</para>
+ <para>Locking, lots of RCU</para>
+ </chapter>
+ </part>
+ </book>
+</set>
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
new file mode 100644
index 00000000..bc3d9f8c
--- /dev/null
+++ b/Documentation/DocBook/Makefile
@@ -0,0 +1,241 @@
+###
+# This makefile is used to generate the kernel documentation,
+# primarily based on in-line comments in various source files.
+# See Documentation/kernel-doc-nano-HOWTO.txt for instruction in how
+# to document the SRC - and how to read it.
+# To add a new book the only step required is to add the book to the
+# list of DOCBOOKS.
+
+DOCBOOKS := z8530book.xml device-drivers.xml \
+ kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
+ writing_usb_driver.xml networking.xml \
+ kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
+ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
+ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
+ 80211.xml debugobjects.xml sh.xml regulator.xml \
+ alsa-driver-api.xml writing-an-alsa-driver.xml \
+ tracepoint.xml drm.xml media_api.xml
+
+include $(srctree)/Documentation/DocBook/media/Makefile
+
+###
+# The build process is as follows (targets):
+# (xmldocs) [by docproc]
+# file.tmpl --> file.xml +--> file.ps (psdocs) [by db2ps or xmlto]
+# +--> file.pdf (pdfdocs) [by db2pdf or xmlto]
+# +--> DIR=file (htmldocs) [by xmlto]
+# +--> man/ (mandocs) [by xmlto]
+
+
+# for PDF and PS output you can choose between xmlto and docbook-utils tools
+PDF_METHOD = $(prefer-db2x)
+PS_METHOD = $(prefer-db2x)
+
+
+###
+# The targets that may be used.
+PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs cleandocs
+
+BOOKS := $(addprefix $(obj)/,$(DOCBOOKS))
+xmldocs: $(BOOKS)
+sgmldocs: xmldocs
+
+PS := $(patsubst %.xml, %.ps, $(BOOKS))
+psdocs: $(PS)
+
+PDF := $(patsubst %.xml, %.pdf, $(BOOKS))
+pdfdocs: $(PDF)
+
+HTML := $(sort $(patsubst %.xml, %.html, $(BOOKS)))
+htmldocs: $(HTML)
+ $(call build_main_index)
+ $(call build_images)
+ $(call install_media_images)
+
+MAN := $(patsubst %.xml, %.9, $(BOOKS))
+mandocs: $(MAN)
+
+installmandocs: mandocs
+ mkdir -p /usr/local/man/man9/
+ install Documentation/DocBook/man/*.9.gz /usr/local/man/man9/
+
+###
+#External programs used
+KERNELDOC = $(srctree)/scripts/kernel-doc
+DOCPROC = $(objtree)/scripts/docproc
+
+XMLTOFLAGS = -m $(srctree)/Documentation/DocBook/stylesheet.xsl
+XMLTOFLAGS += --skip-validation
+
+###
+# DOCPROC is used for two purposes:
+# 1) To generate a dependency list for a .tmpl file
+# 2) To preprocess a .tmpl file and call kernel-doc with
+# appropriate parameters.
+# The following rules are used to generate the .xml documentation
+# required to generate the final targets. (ps, pdf, html).
+quiet_cmd_docproc = DOCPROC $@
+ cmd_docproc = SRCTREE=$(srctree)/ $(DOCPROC) doc $< >$@
+define rule_docproc
+ set -e; \
+ $(if $($(quiet)cmd_$(1)),echo ' $($(quiet)cmd_$(1))';) \
+ $(cmd_$(1)); \
+ ( \
+ echo 'cmd_$@ := $(cmd_$(1))'; \
+ echo $@: `SRCTREE=$(srctree) $(DOCPROC) depend $<`; \
+ ) > $(dir $@).$(notdir $@).cmd
+endef
+
+%.xml: %.tmpl FORCE
+ $(call if_changed_rule,docproc)
+
+###
+#Read in all saved dependency files
+cmd_files := $(wildcard $(foreach f,$(BOOKS),$(dir $(f)).$(notdir $(f)).cmd))
+
+ifneq ($(cmd_files),)
+ include $(cmd_files)
+endif
+
+###
+# Changes in kernel-doc force a rebuild of all documentation
+$(BOOKS): $(KERNELDOC)
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+notfoundtemplate = echo "*** You have to install docbook-utils or xmlto ***"; \
+ exit 1
+db2xtemplate = db2TYPE -o $(dir $@) $<
+xmltotemplate = xmlto TYPE $(XMLTOFLAGS) -o $(dir $@) $<
+
+# determine which methods are available
+ifeq ($(shell which db2ps >/dev/null 2>&1 && echo found),found)
+ use-db2x = db2x
+ prefer-db2x = db2x
+else
+ use-db2x = notfound
+ prefer-db2x = $(use-xmlto)
+endif
+ifeq ($(shell which xmlto >/dev/null 2>&1 && echo found),found)
+ use-xmlto = xmlto
+ prefer-xmlto = xmlto
+else
+ use-xmlto = notfound
+ prefer-xmlto = $(use-db2x)
+endif
+
+# the commands, generated from the chosen template
+quiet_cmd_db2ps = PS $@
+ cmd_db2ps = $(subst TYPE,ps, $($(PS_METHOD)template))
+%.ps : %.xml
+ $(call cmd,db2ps)
+
+quiet_cmd_db2pdf = PDF $@
+ cmd_db2pdf = $(subst TYPE,pdf, $($(PDF_METHOD)template))
+%.pdf : %.xml
+ $(call cmd,db2pdf)
+
+
+index = index.html
+main_idx = Documentation/DocBook/$(index)
+build_main_index = rm -rf $(main_idx); \
+ echo '<h1>Linux Kernel HTML Documentation</h1>' >> $(main_idx) && \
+ echo '<h2>Kernel Version: $(KERNELVERSION)</h2>' >> $(main_idx) && \
+ cat $(HTML) >> $(main_idx)
+
+quiet_cmd_db2html = HTML $@
+ cmd_db2html = xmlto xhtml $(XMLTOFLAGS) -o $(patsubst %.html,%,$@) $< && \
+ echo '<a HREF="$(patsubst %.html,%,$(notdir $@))/index.html"> \
+ $(patsubst %.html,%,$(notdir $@))</a><p>' > $@
+
+%.html: %.xml
+ @(which xmlto > /dev/null 2>&1) || \
+ (echo "*** You need to install xmlto ***"; \
+ exit 1)
+ @rm -rf $@ $(patsubst %.html,%,$@)
+ $(call cmd,db2html)
+ @if [ ! -z "$(PNG-$(basename $(notdir $@)))" ]; then \
+ cp $(PNG-$(basename $(notdir $@))) $(patsubst %.html,%,$@); fi
+
+quiet_cmd_db2man = MAN $@
+ cmd_db2man = if grep -q refentry $<; then xmlto man $(XMLTOFLAGS) -o $(obj)/man $< ; gzip -f $(obj)/man/*.9; fi
+%.9 : %.xml
+ @(which xmlto > /dev/null 2>&1) || \
+ (echo "*** You need to install xmlto ***"; \
+ exit 1)
+ $(Q)mkdir -p $(obj)/man
+ $(call cmd,db2man)
+ @touch $@
+
+###
+# Rules to generate postscripts and PNG images from .fig format files
+quiet_cmd_fig2eps = FIG2EPS $@
+ cmd_fig2eps = fig2dev -Leps $< $@
+
+%.eps: %.fig
+ @(which fig2dev > /dev/null 2>&1) || \
+ (echo "*** You need to install transfig ***"; \
+ exit 1)
+ $(call cmd,fig2eps)
+
+quiet_cmd_fig2png = FIG2PNG $@
+ cmd_fig2png = fig2dev -Lpng $< $@
+
+%.png: %.fig
+ @(which fig2dev > /dev/null 2>&1) || \
+ (echo "*** You need to install transfig ***"; \
+ exit 1)
+ $(call cmd,fig2png)
+
+###
+# Rule to convert a .c file to inline XML documentation
+ gen_xml = :
+ quiet_gen_xml = echo ' GEN $@'
+silent_gen_xml = :
+%.xml: %.c
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>"; \
+ expand --tabs=8 < $< | \
+ sed -e "s/&/\\&amp;/g" \
+ -e "s/</\\&lt;/g" \
+ -e "s/>/\\&gt;/g"; \
+ echo "</programlisting>") > $@
+
+###
+# Help targets as used by the top-level makefile
+dochelp:
+ @echo ' Linux kernel internal documentation in different formats:'
+ @echo ' htmldocs - HTML'
+ @echo ' pdfdocs - PDF'
+ @echo ' psdocs - Postscript'
+ @echo ' xmldocs - XML DocBook'
+ @echo ' mandocs - man pages'
+ @echo ' installmandocs - install man pages generated by mandocs'
+ @echo ' cleandocs - clean all generated DocBook files'
+
+###
+# Temporary files left by various tools
+clean-files := $(DOCBOOKS) \
+ $(patsubst %.xml, %.dvi, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.aux, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.tex, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.log, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.out, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.ps, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.html, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.9, $(DOCBOOKS)) \
+ $(index)
+
+clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man
+
+cleandocs: cleanmediadocs
+ $(Q)rm -f $(call objectify, $(clean-files))
+ $(Q)rm -rf $(call objectify, $(clean-dirs))
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)
diff --git a/Documentation/DocBook/alsa-driver-api.tmpl b/Documentation/DocBook/alsa-driver-api.tmpl
new file mode 100644
index 00000000..0230a96f
--- /dev/null
+++ b/Documentation/DocBook/alsa-driver-api.tmpl
@@ -0,0 +1,109 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<!-- ****************************************************** -->
+<!-- Header -->
+<!-- ****************************************************** -->
+<book id="ALSA-Driver-API">
+ <bookinfo>
+ <title>The ALSA Driver API</title>
+
+ <legalnotice>
+ <para>
+ This document is free; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+ </para>
+
+ <para>
+ This document is distributed in the hope that it will be useful,
+ but <emphasis>WITHOUT ANY WARRANTY</emphasis>; without even the
+ implied warranty of <emphasis>MERCHANTABILITY or FITNESS FOR A
+ PARTICULAR PURPOSE</emphasis>. See the GNU General Public License
+ for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+ </legalnotice>
+
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter><title>Management of Cards and Devices</title>
+ <sect1><title>Card Management</title>
+!Esound/core/init.c
+ </sect1>
+ <sect1><title>Device Components</title>
+!Esound/core/device.c
+ </sect1>
+ <sect1><title>Module requests and Device File Entries</title>
+!Esound/core/sound.c
+ </sect1>
+ <sect1><title>Memory Management Helpers</title>
+!Esound/core/memory.c
+!Esound/core/memalloc.c
+ </sect1>
+ </chapter>
+ <chapter><title>PCM API</title>
+ <sect1><title>PCM Core</title>
+!Esound/core/pcm.c
+!Esound/core/pcm_lib.c
+!Esound/core/pcm_native.c
+ </sect1>
+ <sect1><title>PCM Format Helpers</title>
+!Esound/core/pcm_misc.c
+ </sect1>
+ <sect1><title>PCM Memory Management</title>
+!Esound/core/pcm_memory.c
+ </sect1>
+ </chapter>
+ <chapter><title>Control/Mixer API</title>
+ <sect1><title>General Control Interface</title>
+!Esound/core/control.c
+ </sect1>
+ <sect1><title>AC97 Codec API</title>
+!Esound/pci/ac97/ac97_codec.c
+!Esound/pci/ac97/ac97_pcm.c
+ </sect1>
+ <sect1><title>Virtual Master Control API</title>
+!Esound/core/vmaster.c
+!Iinclude/sound/control.h
+ </sect1>
+ </chapter>
+ <chapter><title>MIDI API</title>
+ <sect1><title>Raw MIDI API</title>
+!Esound/core/rawmidi.c
+ </sect1>
+ <sect1><title>MPU401-UART API</title>
+!Esound/drivers/mpu401/mpu401_uart.c
+ </sect1>
+ </chapter>
+ <chapter><title>Proc Info API</title>
+ <sect1><title>Proc Info Interface</title>
+!Esound/core/info.c
+ </sect1>
+ </chapter>
+ <chapter><title>Miscellaneous Functions</title>
+ <sect1><title>Hardware-Dependent Devices API</title>
+!Esound/core/hwdep.c
+ </sect1>
+ <sect1><title>Jack Abstraction Layer API</title>
+!Esound/core/jack.c
+ </sect1>
+ <sect1><title>ISA DMA Helpers</title>
+!Esound/core/isadma.c
+ </sect1>
+ <sect1><title>Other Helper Macros</title>
+!Iinclude/sound/core.h
+ </sect1>
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl
new file mode 100644
index 00000000..24979f69
--- /dev/null
+++ b/Documentation/DocBook/debugobjects.tmpl
@@ -0,0 +1,441 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="debug-objects-guide">
+ <bookinfo>
+ <title>Debug objects life time</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Thomas</firstname>
+ <surname>Gleixner</surname>
+ <affiliation>
+ <address>
+ <email>tglx@linutronix.de</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2008</year>
+ <holder>Thomas Gleixner</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ debugobjects is a generic infrastructure to track the life time
+ of kernel objects and validate the operations on those.
+ </para>
+ <para>
+ debugobjects is useful to check for the following error patterns:
+ <itemizedlist>
+ <listitem><para>Activation of uninitialized objects</para></listitem>
+ <listitem><para>Initialization of active objects</para></listitem>
+ <listitem><para>Usage of freed/destroyed objects</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ debugobjects is not changing the data structure of the real
+ object so it can be compiled in with a minimal runtime impact
+ and enabled on demand with a kernel command line option.
+ </para>
+ </chapter>
+
+ <chapter id="howto">
+ <title>Howto use debugobjects</title>
+ <para>
+ A kernel subsystem needs to provide a data structure which
+ describes the object type and add calls into the debug code at
+ appropriate places. The data structure to describe the object
+ type needs at minimum the name of the object type. Optional
+ functions can and should be provided to fixup detected problems
+ so the kernel can continue to work and the debug information can
+ be retrieved from a live system instead of hard core debugging
+ with serial consoles and stack trace transcripts from the
+ monitor.
+ </para>
+ <para>
+ The debug calls provided by debugobjects are:
+ <itemizedlist>
+ <listitem><para>debug_object_init</para></listitem>
+ <listitem><para>debug_object_init_on_stack</para></listitem>
+ <listitem><para>debug_object_activate</para></listitem>
+ <listitem><para>debug_object_deactivate</para></listitem>
+ <listitem><para>debug_object_destroy</para></listitem>
+ <listitem><para>debug_object_free</para></listitem>
+ <listitem><para>debug_object_assert_init</para></listitem>
+ </itemizedlist>
+ Each of these functions takes the address of the real object and
+ a pointer to the object type specific debug description
+ structure.
+ </para>
+ <para>
+ Each detected error is reported in the statistics and a limited
+ number of errors are printk'ed including a full stack trace.
+ </para>
+ <para>
+ The statistics are available via /sys/kernel/debug/debug_objects/stats.
+ They provide information about the number of warnings and the
+ number of successful fixups along with information about the
+ usage of the internal tracking objects and the state of the
+ internal tracking objects pool.
+ </para>
+ </chapter>
+ <chapter id="debugfunctions">
+ <title>Debug functions</title>
+ <sect1 id="prototypes">
+ <title>Debug object function reference</title>
+!Elib/debugobjects.c
+ </sect1>
+ <sect1 id="debug_object_init">
+ <title>debug_object_init</title>
+ <para>
+ This function is called whenever the initialization function
+ of a real object is called.
+ </para>
+ <para>
+ When the real object is already tracked by debugobjects it is
+ checked, whether the object can be initialized. Initializing
+ is not allowed for active and destroyed objects. When
+ debugobjects detects an error, then it calls the fixup_init
+ function of the object type description structure if provided
+ by the caller. The fixup function can correct the problem
+ before the real initialization of the object happens. E.g. it
+ can deactivate an active object in order to prevent damage to
+ the subsystem.
+ </para>
+ <para>
+ When the real object is not yet tracked by debugobjects,
+ debugobjects allocates a tracker object for the real object
+ and sets the tracker object state to ODEBUG_STATE_INIT. It
+ verifies that the object is not on the callers stack. If it is
+ on the callers stack then a limited number of warnings
+ including a full stack trace is printk'ed. The calling code
+ must use debug_object_init_on_stack() and remove the object
+ before leaving the function which allocated it. See next
+ section.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_init_on_stack">
+ <title>debug_object_init_on_stack</title>
+ <para>
+ This function is called whenever the initialization function
+ of a real object which resides on the stack is called.
+ </para>
+ <para>
+ When the real object is already tracked by debugobjects it is
+ checked, whether the object can be initialized. Initializing
+ is not allowed for active and destroyed objects. When
+ debugobjects detects an error, then it calls the fixup_init
+ function of the object type description structure if provided
+ by the caller. The fixup function can correct the problem
+ before the real initialization of the object happens. E.g. it
+ can deactivate an active object in order to prevent damage to
+ the subsystem.
+ </para>
+ <para>
+ When the real object is not yet tracked by debugobjects
+ debugobjects allocates a tracker object for the real object
+ and sets the tracker object state to ODEBUG_STATE_INIT. It
+ verifies that the object is on the callers stack.
+ </para>
+ <para>
+ An object which is on the stack must be removed from the
+ tracker by calling debug_object_free() before the function
+ which allocates the object returns. Otherwise we keep track of
+ stale objects.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_activate">
+ <title>debug_object_activate</title>
+ <para>
+ This function is called whenever the activation function of a
+ real object is called.
+ </para>
+ <para>
+ When the real object is already tracked by debugobjects it is
+ checked, whether the object can be activated. Activating is
+ not allowed for active and destroyed objects. When
+ debugobjects detects an error, then it calls the
+ fixup_activate function of the object type description
+ structure if provided by the caller. The fixup function can
+ correct the problem before the real activation of the object
+ happens. E.g. it can deactivate an active object in order to
+ prevent damage to the subsystem.
+ </para>
+ <para>
+ When the real object is not yet tracked by debugobjects then
+ the fixup_activate function is called if available. This is
+ necessary to allow the legitimate activation of statically
+ allocated and initialized objects. The fixup function checks
+ whether the object is valid and calls the debug_objects_init()
+ function to initialize the tracking of this object.
+ </para>
+ <para>
+ When the activation is legitimate, then the state of the
+ associated tracker object is set to ODEBUG_STATE_ACTIVE.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_deactivate">
+ <title>debug_object_deactivate</title>
+ <para>
+ This function is called whenever the deactivation function of
+ a real object is called.
+ </para>
+ <para>
+ When the real object is tracked by debugobjects it is checked,
+ whether the object can be deactivated. Deactivating is not
+ allowed for untracked or destroyed objects.
+ </para>
+ <para>
+ When the deactivation is legitimate, then the state of the
+ associated tracker object is set to ODEBUG_STATE_INACTIVE.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_destroy">
+ <title>debug_object_destroy</title>
+ <para>
+ This function is called to mark an object destroyed. This is
+ useful to prevent the usage of invalid objects, which are
+ still available in memory: either statically allocated objects
+ or objects which are freed later.
+ </para>
+ <para>
+ When the real object is tracked by debugobjects it is checked,
+ whether the object can be destroyed. Destruction is not
+ allowed for active and destroyed objects. When debugobjects
+ detects an error, then it calls the fixup_destroy function of
+ the object type description structure if provided by the
+ caller. The fixup function can correct the problem before the
+ real destruction of the object happens. E.g. it can deactivate
+ an active object in order to prevent damage to the subsystem.
+ </para>
+ <para>
+ When the destruction is legitimate, then the state of the
+ associated tracker object is set to ODEBUG_STATE_DESTROYED.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_free">
+ <title>debug_object_free</title>
+ <para>
+ This function is called before an object is freed.
+ </para>
+ <para>
+ When the real object is tracked by debugobjects it is checked,
+ whether the object can be freed. Free is not allowed for
+ active objects. When debugobjects detects an error, then it
+ calls the fixup_free function of the object type description
+ structure if provided by the caller. The fixup function can
+ correct the problem before the real free of the object
+ happens. E.g. it can deactivate an active object in order to
+ prevent damage to the subsystem.
+ </para>
+ <para>
+ Note that debug_object_free removes the object from the
+ tracker. Later usage of the object is detected by the other
+ debug checks.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_assert_init">
+ <title>debug_object_assert_init</title>
+ <para>
+ This function is called to assert that an object has been
+ initialized.
+ </para>
+ <para>
+ When the real object is not tracked by debugobjects, it calls
+ fixup_assert_init of the object type description structure
+ provided by the caller, with the hardcoded object state
+ ODEBUG_NOT_AVAILABLE. The fixup function can correct the problem
+ by calling debug_object_init and other specific initializing
+ functions.
+ </para>
+ <para>
+ When the real object is already tracked by debugobjects it is
+ ignored.
+ </para>
+ </sect1>
+ </chapter>
+ <chapter id="fixupfunctions">
+ <title>Fixup functions</title>
+ <sect1 id="debug_obj_descr">
+ <title>Debug object type description structure</title>
+!Iinclude/linux/debugobjects.h
+ </sect1>
+ <sect1 id="fixup_init">
+ <title>fixup_init</title>
+ <para>
+ This function is called from the debug code whenever a problem
+ in debug_object_init is detected. The function takes the
+ address of the object and the state which is currently
+ recorded in the tracker.
+ </para>
+ <para>
+ Called from debug_object_init when the object state is:
+ <itemizedlist>
+ <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The function returns 1 when the fixup was successful,
+ otherwise 0. The return value is used to update the
+ statistics.
+ </para>
+ <para>
+ Note, that the function needs to call the debug_object_init()
+ function again, after the damage has been repaired in order to
+ keep the state consistent.
+ </para>
+ </sect1>
+
+ <sect1 id="fixup_activate">
+ <title>fixup_activate</title>
+ <para>
+ This function is called from the debug code whenever a problem
+ in debug_object_activate is detected.
+ </para>
+ <para>
+ Called from debug_object_activate when the object state is:
+ <itemizedlist>
+ <listitem><para>ODEBUG_STATE_NOTAVAILABLE</para></listitem>
+ <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The function returns 1 when the fixup was successful,
+ otherwise 0. The return value is used to update the
+ statistics.
+ </para>
+ <para>
+ Note that the function needs to call the debug_object_activate()
+ function again after the damage has been repaired in order to
+ keep the state consistent.
+ </para>
+ <para>
+ The activation of statically initialized objects is a special
+ case. When debug_object_activate() has no tracked object for
+ this object address then fixup_activate() is called with
+ object state ODEBUG_STATE_NOTAVAILABLE. The fixup function
+ needs to check whether this is a legitimate case of a
+ statically initialized object or not. In case it is it calls
+ debug_object_init() and debug_object_activate() to make the
+ object known to the tracker and marked active. In this case
+ the function should return 0 because this is not a real fixup.
+ </para>
+ </sect1>
+
+ <sect1 id="fixup_destroy">
+ <title>fixup_destroy</title>
+ <para>
+ This function is called from the debug code whenever a problem
+ in debug_object_destroy is detected.
+ </para>
+ <para>
+ Called from debug_object_destroy when the object state is:
+ <itemizedlist>
+ <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The function returns 1 when the fixup was successful,
+ otherwise 0. The return value is used to update the
+ statistics.
+ </para>
+ </sect1>
+ <sect1 id="fixup_free">
+ <title>fixup_free</title>
+ <para>
+ This function is called from the debug code whenever a problem
+ in debug_object_free is detected. Further it can be called
+ from the debug checks in kfree/vfree, when an active object is
+ detected from the debug_check_no_obj_freed() sanity checks.
+ </para>
+ <para>
+ Called from debug_object_free() or debug_check_no_obj_freed()
+ when the object state is:
+ <itemizedlist>
+ <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The function returns 1 when the fixup was successful,
+ otherwise 0. The return value is used to update the
+ statistics.
+ </para>
+ </sect1>
+ <sect1 id="fixup_assert_init">
+ <title>fixup_assert_init</title>
+ <para>
+ This function is called from the debug code whenever a problem
+ in debug_object_assert_init is detected.
+ </para>
+ <para>
+ Called from debug_object_assert_init() with a hardcoded state
+ ODEBUG_STATE_NOTAVAILABLE when the object is not found in the
+ debug bucket.
+ </para>
+ <para>
+ The function returns 1 when the fixup was successful,
+ otherwise 0. The return value is used to update the
+ statistics.
+ </para>
+ <para>
+ Note, this function should make sure debug_object_init() is
+ called before returning.
+ </para>
+ <para>
+ The handling of statically initialized objects is a special
+ case. The fixup function should check if this is a legitimate
+ case of a statically initialized object or not. In this case only
+ debug_object_init() should be called to make the object known to
+ the tracker. Then the function should return 0 because this is not
+ a real fixup.
+ </para>
+ </sect1>
+ </chapter>
+ <chapter id="bugs">
+ <title>Known Bugs And Assumptions</title>
+ <para>
+ None (knock on wood).
+ </para>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
new file mode 100644
index 00000000..7514dbf0
--- /dev/null
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -0,0 +1,466 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LinuxDriversAPI">
+ <bookinfo>
+ <title>Linux Device Drivers</title>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="Basics">
+ <title>Driver Basics</title>
+ <sect1><title>Driver Entry and Exit points</title>
+!Iinclude/linux/init.h
+ </sect1>
+
+ <sect1><title>Atomic and pointer manipulation</title>
+!Iarch/x86/include/asm/atomic.h
+ </sect1>
+
+ <sect1><title>Delaying, scheduling, and timer routines</title>
+!Iinclude/linux/sched.h
+!Ekernel/sched/core.c
+!Ikernel/sched/cpupri.c
+!Ikernel/sched/fair.c
+!Iinclude/linux/completion.h
+!Ekernel/timer.c
+ </sect1>
+ <sect1><title>Wait queues and Wake events</title>
+!Iinclude/linux/wait.h
+!Ekernel/wait.c
+ </sect1>
+ <sect1><title>High-resolution timers</title>
+!Iinclude/linux/ktime.h
+!Iinclude/linux/hrtimer.h
+!Ekernel/hrtimer.c
+ </sect1>
+ <sect1><title>Workqueues and Kevents</title>
+!Ekernel/workqueue.c
+ </sect1>
+ <sect1><title>Internal Functions</title>
+!Ikernel/exit.c
+!Ikernel/signal.c
+!Iinclude/linux/kthread.h
+!Ekernel/kthread.c
+ </sect1>
+
+ <sect1><title>Kernel objects manipulation</title>
+<!--
+X!Iinclude/linux/kobject.h
+-->
+!Elib/kobject.c
+ </sect1>
+
+ <sect1><title>Kernel utility functions</title>
+!Iinclude/linux/kernel.h
+!Ekernel/printk.c
+!Ekernel/panic.c
+!Ekernel/sys.c
+!Ekernel/rcupdate.c
+ </sect1>
+
+ <sect1><title>Device Resource Management</title>
+!Edrivers/base/devres.c
+ </sect1>
+
+ </chapter>
+
+ <chapter id="devdrivers">
+ <title>Device drivers infrastructure</title>
+ <sect1><title>The Basic Device Driver-Model Structures </title>
+!Iinclude/linux/device.h
+ </sect1>
+ <sect1><title>Device Drivers Base</title>
+!Idrivers/base/init.c
+!Edrivers/base/driver.c
+!Edrivers/base/core.c
+!Edrivers/base/syscore.c
+!Edrivers/base/class.c
+!Idrivers/base/node.c
+!Edrivers/base/firmware_class.c
+!Edrivers/base/transport_class.c
+<!-- Cannot be included, because
+ attribute_container_add_class_device_adapter
+ and attribute_container_classdev_to_container
+ exceed allowed 44 characters maximum
+X!Edrivers/base/attribute_container.c
+-->
+!Edrivers/base/dd.c
+<!--
+X!Edrivers/base/interface.c
+-->
+!Iinclude/linux/platform_device.h
+!Edrivers/base/platform.c
+!Edrivers/base/bus.c
+ </sect1>
+ <sect1><title>Device Drivers DMA Management</title>
+!Edrivers/base/dma-buf.c
+!Edrivers/base/dma-coherent.c
+!Edrivers/base/dma-mapping.c
+ </sect1>
+ <sect1><title>Device Drivers Power Management</title>
+!Edrivers/base/power/main.c
+ </sect1>
+ <sect1><title>Device Drivers ACPI Support</title>
+<!-- Internal functions only
+X!Edrivers/acpi/sleep/main.c
+X!Edrivers/acpi/sleep/wakeup.c
+X!Edrivers/acpi/motherboard.c
+X!Edrivers/acpi/bus.c
+-->
+!Edrivers/acpi/scan.c
+!Idrivers/acpi/scan.c
+<!-- No correct structured comments
+X!Edrivers/acpi/pci_bind.c
+-->
+ </sect1>
+ <sect1><title>Device drivers PnP support</title>
+!Idrivers/pnp/core.c
+<!-- No correct structured comments
+X!Edrivers/pnp/system.c
+ -->
+!Edrivers/pnp/card.c
+!Idrivers/pnp/driver.c
+!Edrivers/pnp/manager.c
+!Edrivers/pnp/support.c
+ </sect1>
+ <sect1><title>Userspace IO devices</title>
+!Edrivers/uio/uio.c
+!Iinclude/linux/uio_driver.h
+ </sect1>
+ </chapter>
+
+ <chapter id="parportdev">
+ <title>Parallel Port Devices</title>
+!Iinclude/linux/parport.h
+!Edrivers/parport/ieee1284.c
+!Edrivers/parport/share.c
+!Idrivers/parport/daisy.c
+ </chapter>
+
+ <chapter id="message_devices">
+ <title>Message-based devices</title>
+ <sect1><title>Fusion message devices</title>
+!Edrivers/message/fusion/mptbase.c
+!Idrivers/message/fusion/mptbase.c
+!Edrivers/message/fusion/mptscsih.c
+!Idrivers/message/fusion/mptscsih.c
+!Idrivers/message/fusion/mptctl.c
+!Idrivers/message/fusion/mptspi.c
+!Idrivers/message/fusion/mptfc.c
+!Idrivers/message/fusion/mptlan.c
+ </sect1>
+ <sect1><title>I2O message devices</title>
+!Iinclude/linux/i2o.h
+!Idrivers/message/i2o/core.h
+!Edrivers/message/i2o/iop.c
+!Idrivers/message/i2o/iop.c
+!Idrivers/message/i2o/config-osm.c
+!Edrivers/message/i2o/exec-osm.c
+!Idrivers/message/i2o/exec-osm.c
+!Idrivers/message/i2o/bus-osm.c
+!Edrivers/message/i2o/device.c
+!Idrivers/message/i2o/device.c
+!Idrivers/message/i2o/driver.c
+!Idrivers/message/i2o/pci.c
+!Idrivers/message/i2o/i2o_block.c
+!Idrivers/message/i2o/i2o_scsi.c
+!Idrivers/message/i2o/i2o_proc.c
+ </sect1>
+ </chapter>
+
+ <chapter id="snddev">
+ <title>Sound Devices</title>
+!Iinclude/sound/core.h
+!Esound/sound_core.c
+!Iinclude/sound/pcm.h
+!Esound/core/pcm.c
+!Esound/core/device.c
+!Esound/core/info.c
+!Esound/core/rawmidi.c
+!Esound/core/sound.c
+!Esound/core/memory.c
+!Esound/core/pcm_memory.c
+!Esound/core/init.c
+!Esound/core/isadma.c
+!Esound/core/control.c
+!Esound/core/pcm_lib.c
+!Esound/core/hwdep.c
+!Esound/core/pcm_native.c
+!Esound/core/memalloc.c
+<!-- FIXME: Removed for now since no structured comments in source
+X!Isound/sound_firmware.c
+-->
+ </chapter>
+
+ <chapter id="uart16x50">
+ <title>16x50 UART Driver</title>
+!Edrivers/tty/serial/serial_core.c
+!Edrivers/tty/serial/8250/8250.c
+ </chapter>
+
+ <chapter id="fbdev">
+ <title>Frame Buffer Library</title>
+
+ <para>
+ The frame buffer drivers depend heavily on four data structures.
+ These structures are declared in include/linux/fb.h. They are
+ fb_info, fb_var_screeninfo, fb_fix_screeninfo and fb_monospecs.
+ The last three can be made available to and from userland.
+ </para>
+
+ <para>
+ fb_info defines the current state of a particular video card.
+ Inside fb_info, there exists a fb_ops structure which is a
+ collection of needed functions to make fbdev and fbcon work.
+ fb_info is only visible to the kernel.
+ </para>
+
+ <para>
+ fb_var_screeninfo is used to describe the features of a video card
+ that are user defined. With fb_var_screeninfo, things such as
+ depth and the resolution may be defined.
+ </para>
+
+ <para>
+ The next structure is fb_fix_screeninfo. This defines the
+ properties of a card that are created when a mode is set and can't
+ be changed otherwise. A good example of this is the start of the
+ frame buffer memory. This "locks" the address of the frame buffer
+ memory, so that it cannot be changed or moved.
+ </para>
+
+ <para>
+ The last structure is fb_monospecs. In the old API, there was
+ little importance for fb_monospecs. This allowed for forbidden things
+ such as setting a mode of 800x600 on a fix frequency monitor. With
+ the new API, fb_monospecs prevents such things, and if used
+ correctly, can prevent a monitor from being cooked. fb_monospecs
+ will not be useful until kernels 2.5.x.
+ </para>
+
+ <sect1><title>Frame Buffer Memory</title>
+!Edrivers/video/fbmem.c
+ </sect1>
+<!--
+ <sect1><title>Frame Buffer Console</title>
+X!Edrivers/video/console/fbcon.c
+ </sect1>
+-->
+ <sect1><title>Frame Buffer Colormap</title>
+!Edrivers/video/fbcmap.c
+ </sect1>
+<!-- FIXME:
+ drivers/video/fbgen.c has no docs, which stuffs up the sgml. Comment
+ out until somebody adds docs. KAO
+ <sect1><title>Frame Buffer Generic Functions</title>
+X!Idrivers/video/fbgen.c
+ </sect1>
+KAO -->
+ <sect1><title>Frame Buffer Video Mode Database</title>
+!Idrivers/video/modedb.c
+!Edrivers/video/modedb.c
+ </sect1>
+ <sect1><title>Frame Buffer Macintosh Video Mode Database</title>
+!Edrivers/video/macmodes.c
+ </sect1>
+ <sect1><title>Frame Buffer Fonts</title>
+ <para>
+ Refer to the file drivers/video/console/fonts.c for more information.
+ </para>
+<!-- FIXME: Removed for now since no structured comments in source
+X!Idrivers/video/console/fonts.c
+-->
+ </sect1>
+ </chapter>
+
+ <chapter id="input_subsystem">
+ <title>Input Subsystem</title>
+ <sect1><title>Input core</title>
+!Iinclude/linux/input.h
+!Edrivers/input/input.c
+!Edrivers/input/ff-core.c
+!Edrivers/input/ff-memless.c
+ </sect1>
+ <sect1><title>Multitouch Library</title>
+!Iinclude/linux/input/mt.h
+!Edrivers/input/input-mt.c
+ </sect1>
+ <sect1><title>Polled input devices</title>
+!Iinclude/linux/input-polldev.h
+!Edrivers/input/input-polldev.c
+ </sect1>
+ <sect1><title>Matrix keyboars/keypads</title>
+!Iinclude/linux/input/matrix_keypad.h
+ </sect1>
+ <sect1><title>Sparse keymap support</title>
+!Iinclude/linux/input/sparse-keymap.h
+!Edrivers/input/sparse-keymap.c
+ </sect1>
+ </chapter>
+
+ <chapter id="spi">
+ <title>Serial Peripheral Interface (SPI)</title>
+ <para>
+ SPI is the "Serial Peripheral Interface", widely used with
+ embedded systems because it is a simple and efficient
+ interface: basically a multiplexed shift register.
+ Its three signal wires hold a clock (SCK, often in the range
+ of 1-20 MHz), a "Master Out, Slave In" (MOSI) data line, and
+ a "Master In, Slave Out" (MISO) data line.
+ SPI is a full duplex protocol; for each bit shifted out the
+ MOSI line (one per clock) another is shifted in on the MISO line.
+ Those bits are assembled into words of various sizes on the
+ way to and from system memory.
+ An additional chipselect line is usually active-low (nCS);
+ four signals are normally used for each peripheral, plus
+ sometimes an interrupt.
+ </para>
+ <para>
+ The SPI bus facilities listed here provide a generalized
+ interface to declare SPI busses and devices, manage them
+ according to the standard Linux driver model, and perform
+ input/output operations.
+ At this time, only "master" side interfaces are supported,
+ where Linux talks to SPI peripherals and does not implement
+ such a peripheral itself.
+ (Interfaces to support implementing SPI slaves would
+ necessarily look different.)
+ </para>
+ <para>
+ The programming interface is structured around two kinds of driver,
+ and two kinds of device.
+ A "Controller Driver" abstracts the controller hardware, which may
+ be as simple as a set of GPIO pins or as complex as a pair of FIFOs
+ connected to dual DMA engines on the other side of the SPI shift
+ register (maximizing throughput). Such drivers bridge between
+ whatever bus they sit on (often the platform bus) and SPI, and
+ expose the SPI side of their device as a
+ <structname>struct spi_master</structname>.
+ SPI devices are children of that master, represented as a
+ <structname>struct spi_device</structname> and manufactured from
+ <structname>struct spi_board_info</structname> descriptors which
+ are usually provided by board-specific initialization code.
+ A <structname>struct spi_driver</structname> is called a
+ "Protocol Driver", and is bound to a spi_device using normal
+ driver model calls.
+ </para>
+ <para>
+ The I/O model is a set of queued messages. Protocol drivers
+ submit one or more <structname>struct spi_message</structname>
+ objects, which are processed and completed asynchronously.
+ (There are synchronous wrappers, however.) Messages are
+ built from one or more <structname>struct spi_transfer</structname>
+ objects, each of which wraps a full duplex SPI transfer.
+ A variety of protocol tweaking options are needed, because
+ different chips adopt very different policies for how they
+ use the bits transferred with SPI.
+ </para>
+!Iinclude/linux/spi/spi.h
+!Fdrivers/spi/spi.c spi_register_board_info
+!Edrivers/spi/spi.c
+ </chapter>
+
+ <chapter id="i2c">
+ <title>I<superscript>2</superscript>C and SMBus Subsystem</title>
+
+ <para>
+ I<superscript>2</superscript>C (or without fancy typography, "I2C")
+ is an acronym for the "Inter-IC" bus, a simple bus protocol which is
+ widely used where low data rate communications suffice.
+ Since it's also a licensed trademark, some vendors use another
+ name (such as "Two-Wire Interface", TWI) for the same bus.
+ I2C only needs two signals (SCL for clock, SDA for data), conserving
+ board real estate and minimizing signal quality issues.
+ Most I2C devices use seven bit addresses, and bus speeds of up
+ to 400 kHz; there's a high speed extension (3.4 MHz) that's not yet
+ found wide use.
+ I2C is a multi-master bus; open drain signaling is used to
+ arbitrate between masters, as well as to handshake and to
+ synchronize clocks from slower clients.
+ </para>
+
+ <para>
+ The Linux I2C programming interfaces support only the master
+ side of bus interactions, not the slave side.
+ The programming interface is structured around two kinds of driver,
+ and two kinds of device.
+ An I2C "Adapter Driver" abstracts the controller hardware; it binds
+ to a physical device (perhaps a PCI device or platform_device) and
+ exposes a <structname>struct i2c_adapter</structname> representing
+ each I2C bus segment it manages.
+ On each I2C bus segment will be I2C devices represented by a
+ <structname>struct i2c_client</structname>. Those devices will
+ be bound to a <structname>struct i2c_driver</structname>,
+ which should follow the standard Linux driver model.
+ (At this writing, a legacy model is more widely used.)
+ There are functions to perform various I2C protocol operations; at
+ this writing all such functions are usable only from task context.
+ </para>
+
+ <para>
+ The System Management Bus (SMBus) is a sibling protocol. Most SMBus
+ systems are also I2C conformant. The electrical constraints are
+ tighter for SMBus, and it standardizes particular protocol messages
+ and idioms. Controllers that support I2C can also support most
+ SMBus operations, but SMBus controllers don't support all the protocol
+ options that an I2C controller will.
+ There are functions to perform various SMBus protocol operations,
+ either using I2C primitives or by issuing SMBus commands to
+ i2c_adapter devices which don't support those I2C operations.
+ </para>
+
+!Iinclude/linux/i2c.h
+!Fdrivers/i2c/i2c-boardinfo.c i2c_register_board_info
+!Edrivers/i2c/i2c-core.c
+ </chapter>
+
+ <chapter id="hsi">
+ <title>High Speed Synchronous Serial Interface (HSI)</title>
+
+ <para>
+ High Speed Synchronous Serial Interface (HSI) is a
+ serial interface mainly used for connecting application
+ engines (APE) with cellular modem engines (CMT) in cellular
+ handsets.
+
+ HSI provides multiplexing for up to 16 logical channels,
+ low-latency and full duplex communication.
+ </para>
+
+!Iinclude/linux/hsi/hsi.h
+!Edrivers/hsi/hsi.c
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/deviceiobook.tmpl b/Documentation/DocBook/deviceiobook.tmpl
new file mode 100644
index 00000000..54199a0d
--- /dev/null
+++ b/Documentation/DocBook/deviceiobook.tmpl
@@ -0,0 +1,323 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="DoingIO">
+ <bookinfo>
+ <title>Bus-Independent Device Accesses</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Matthew</firstname>
+ <surname>Wilcox</surname>
+ <affiliation>
+ <address>
+ <email>matthew@wil.cx</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <authorgroup>
+ <author>
+ <firstname>Alan</firstname>
+ <surname>Cox</surname>
+ <affiliation>
+ <address>
+ <email>alan@lxorguk.ukuu.org.uk</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2001</year>
+ <holder>Matthew Wilcox</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ Linux provides an API which abstracts performing IO across all busses
+ and devices, allowing device drivers to be written independently of
+ bus type.
+ </para>
+ </chapter>
+
+ <chapter id="bugs">
+ <title>Known Bugs And Assumptions</title>
+ <para>
+ None.
+ </para>
+ </chapter>
+
+ <chapter id="mmio">
+ <title>Memory Mapped IO</title>
+ <sect1 id="getting_access_to_the_device">
+ <title>Getting Access to the Device</title>
+ <para>
+ The most widely supported form of IO is memory mapped IO.
+ That is, a part of the CPU's address space is interpreted
+ not as accesses to memory, but as accesses to a device. Some
+ architectures define devices to be at a fixed address, but most
+ have some method of discovering devices. The PCI bus walk is a
+ good example of such a scheme. This document does not cover how
+ to receive such an address, but assumes you are starting with one.
+ Physical addresses are of type unsigned long.
+ </para>
+
+ <para>
+ This address should not be used directly. Instead, to get an
+ address suitable for passing to the accessor functions described
+ below, you should call <function>ioremap</function>.
+ An address suitable for accessing the device will be returned to you.
+ </para>
+
+ <para>
+ After you've finished using the device (say, in your module's
+ exit routine), call <function>iounmap</function> in order to return
+ the address space to the kernel. Most architectures allocate new
+ address space each time you call <function>ioremap</function>, and
+ they can run out unless you call <function>iounmap</function>.
+ </para>
+ </sect1>
+
+ <sect1 id="accessing_the_device">
+ <title>Accessing the device</title>
+ <para>
+ The part of the interface most used by drivers is reading and
+ writing memory-mapped registers on the device. Linux provides
+ interfaces to read and write 8-bit, 16-bit, 32-bit and 64-bit
+ quantities. Due to a historical accident, these are named byte,
+ word, long and quad accesses. Both read and write accesses are
+ supported; there is no prefetch support at this time.
+ </para>
+
+ <para>
+ The functions are named <function>readb</function>,
+ <function>readw</function>, <function>readl</function>,
+ <function>readq</function>, <function>readb_relaxed</function>,
+ <function>readw_relaxed</function>, <function>readl_relaxed</function>,
+ <function>readq_relaxed</function>, <function>writeb</function>,
+ <function>writew</function>, <function>writel</function> and
+ <function>writeq</function>.
+ </para>
+
+ <para>
+ Some devices (such as framebuffers) would like to use larger
+ transfers than 8 bytes at a time. For these devices, the
+ <function>memcpy_toio</function>, <function>memcpy_fromio</function>
+ and <function>memset_io</function> functions are provided.
+ Do not use memset or memcpy on IO addresses; they
+ are not guaranteed to copy data in order.
+ </para>
+
+ <para>
+ The read and write functions are defined to be ordered. That is the
+ compiler is not permitted to reorder the I/O sequence. When the
+ ordering can be compiler optimised, you can use <function>
+ __readb</function> and friends to indicate the relaxed ordering. Use
+ this with care.
+ </para>
+
+ <para>
+ While the basic functions are defined to be synchronous with respect
+ to each other and ordered with respect to each other the busses the
+ devices sit on may themselves have asynchronicity. In particular many
+ authors are burned by the fact that PCI bus writes are posted
+ asynchronously. A driver author must issue a read from the same
+ device to ensure that writes have occurred in the specific cases the
+ author cares. This kind of property cannot be hidden from driver
+ writers in the API. In some cases, the read used to flush the device
+ may be expected to fail (if the card is resetting, for example). In
+ that case, the read should be done from config space, which is
+ guaranteed to soft-fail if the card doesn't respond.
+ </para>
+
+ <para>
+ The following is an example of flushing a write to a device when
+ the driver would like to ensure the write's effects are visible prior
+ to continuing execution.
+ </para>
+
+<programlisting>
+static inline void
+qla1280_disable_intrs(struct scsi_qla_host *ha)
+{
+ struct device_reg *reg;
+
+ reg = ha->iobase;
+ /* disable risc and host interrupts */
+ WRT_REG_WORD(&amp;reg->ictrl, 0);
+ /*
+ * The following read will ensure that the above write
+ * has been received by the device before we return from this
+ * function.
+ */
+ RD_REG_WORD(&amp;reg->ictrl);
+ ha->flags.ints_enabled = 0;
+}
+</programlisting>
+
+ <para>
+ In addition to write posting, on some large multiprocessing systems
+ (e.g. SGI Challenge, Origin and Altix machines) posted writes won't
+ be strongly ordered coming from different CPUs. Thus it's important
+ to properly protect parts of your driver that do memory-mapped writes
+ with locks and use the <function>mmiowb</function> to make sure they
+ arrive in the order intended. Issuing a regular <function>readX
+ </function> will also ensure write ordering, but should only be used
+ when the driver has to be sure that the write has actually arrived
+ at the device (not that it's simply ordered with respect to other
+ writes), since a full <function>readX</function> is a relatively
+ expensive operation.
+ </para>
+
+ <para>
+ Generally, one should use <function>mmiowb</function> prior to
+ releasing a spinlock that protects regions using <function>writeb
+ </function> or similar functions that aren't surrounded by <function>
+ readb</function> calls, which will ensure ordering and flushing. The
+ following pseudocode illustrates what might occur if write ordering
+ isn't guaranteed via <function>mmiowb</function> or one of the
+ <function>readX</function> functions.
+ </para>
+
+<programlisting>
+CPU A: spin_lock_irqsave(&amp;dev_lock, flags)
+CPU A: ...
+CPU A: writel(newval, ring_ptr);
+CPU A: spin_unlock_irqrestore(&amp;dev_lock, flags)
+ ...
+CPU B: spin_lock_irqsave(&amp;dev_lock, flags)
+CPU B: writel(newval2, ring_ptr);
+CPU B: ...
+CPU B: spin_unlock_irqrestore(&amp;dev_lock, flags)
+</programlisting>
+
+ <para>
+ In the case above, newval2 could be written to ring_ptr before
+ newval. Fixing it is easy though:
+ </para>
+
+<programlisting>
+CPU A: spin_lock_irqsave(&amp;dev_lock, flags)
+CPU A: ...
+CPU A: writel(newval, ring_ptr);
+CPU A: mmiowb(); /* ensure no other writes beat us to the device */
+CPU A: spin_unlock_irqrestore(&amp;dev_lock, flags)
+ ...
+CPU B: spin_lock_irqsave(&amp;dev_lock, flags)
+CPU B: writel(newval2, ring_ptr);
+CPU B: ...
+CPU B: mmiowb();
+CPU B: spin_unlock_irqrestore(&amp;dev_lock, flags)
+</programlisting>
+
+ <para>
+ See tg3.c for a real world example of how to use <function>mmiowb
+ </function>
+ </para>
+
+ <para>
+ PCI ordering rules also guarantee that PIO read responses arrive
+ after any outstanding DMA writes from that bus, since for some devices
+ the result of a <function>readb</function> call may signal to the
+ driver that a DMA transaction is complete. In many cases, however,
+ the driver may want to indicate that the next
+ <function>readb</function> call has no relation to any previous DMA
+ writes performed by the device. The driver can use
+ <function>readb_relaxed</function> for these cases, although only
+ some platforms will honor the relaxed semantics. Using the relaxed
+ read functions will provide significant performance benefits on
+ platforms that support it. The qla2xxx driver provides examples
+ of how to use <function>readX_relaxed</function>. In many cases,
+ a majority of the driver's <function>readX</function> calls can
+ safely be converted to <function>readX_relaxed</function> calls, since
+ only a few will indicate or depend on DMA completion.
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="port_space_accesses">
+ <title>Port Space Accesses</title>
+ <sect1 id="port_space_explained">
+ <title>Port Space Explained</title>
+
+ <para>
+ Another form of IO commonly supported is Port Space. This is a
+ range of addresses separate to the normal memory address space.
+ Access to these addresses is generally not as fast as accesses
+ to the memory mapped addresses, and it also has a potentially
+ smaller address space.
+ </para>
+
+ <para>
+ Unlike memory mapped IO, no preparation is required
+ to access port space.
+ </para>
+
+ </sect1>
+ <sect1 id="accessing_port_space">
+ <title>Accessing Port Space</title>
+ <para>
+ Accesses to this space are provided through a set of functions
+ which allow 8-bit, 16-bit and 32-bit accesses; also
+ known as byte, word and long. These functions are
+ <function>inb</function>, <function>inw</function>,
+ <function>inl</function>, <function>outb</function>,
+ <function>outw</function> and <function>outl</function>.
+ </para>
+
+ <para>
+ Some variants are provided for these functions. Some devices
+ require that accesses to their ports are slowed down. This
+ functionality is provided by appending a <function>_p</function>
+ to the end of the function. There are also equivalents to memcpy.
+ The <function>ins</function> and <function>outs</function>
+ functions copy bytes, words or longs to the given port.
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="pubfunctions">
+ <title>Public Functions Provided</title>
+!Iarch/x86/include/asm/io.h
+!Elib/pci_iomap.c
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
new file mode 100644
index 00000000..f9df3b87
--- /dev/null
+++ b/Documentation/DocBook/drm.tmpl
@@ -0,0 +1,2540 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="drmDevelopersGuide">
+ <bookinfo>
+ <title>Linux DRM Developer's Guide</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Jesse</firstname>
+ <surname>Barnes</surname>
+ <contrib>Initial version</contrib>
+ <affiliation>
+ <orgname>Intel Corporation</orgname>
+ <address>
+ <email>jesse.barnes@intel.com</email>
+ </address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Laurent</firstname>
+ <surname>Pinchart</surname>
+ <contrib>Driver internals</contrib>
+ <affiliation>
+ <orgname>Ideas on board SPRL</orgname>
+ <address>
+ <email>laurent.pinchart@ideasonboard.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2008-2009</year>
+ <year>2012</year>
+ <holder>Intel Corporation</holder>
+ <holder>Laurent Pinchart</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ The contents of this file may be used under the terms of the GNU
+ General Public License version 2 (the "GPL") as distributed in
+ the kernel source COPYING file.
+ </para>
+ </legalnotice>
+
+ <revhistory>
+ <!-- Put document revisions here, newest first. -->
+ <revision>
+ <revnumber>1.0</revnumber>
+ <date>2012-07-13</date>
+ <authorinitials>LP</authorinitials>
+ <revremark>Added extensive documentation about driver internals.
+ </revremark>
+ </revision>
+ </revhistory>
+ </bookinfo>
+
+<toc></toc>
+
+ <!-- Introduction -->
+
+ <chapter id="drmIntroduction">
+ <title>Introduction</title>
+ <para>
+ The Linux DRM layer contains code intended to support the needs
+ of complex graphics devices, usually containing programmable
+ pipelines well suited to 3D graphics acceleration. Graphics
+ drivers in the kernel may make use of DRM functions to make
+ tasks like memory management, interrupt handling and DMA easier,
+ and provide a uniform interface to applications.
+ </para>
+ <para>
+ A note on versions: this guide covers features found in the DRM
+ tree, including the TTM memory manager, output configuration and
+ mode setting, and the new vblank internals, in addition to all
+ the regular features found in current kernels.
+ </para>
+ <para>
+ [Insert diagram of typical DRM stack here]
+ </para>
+ </chapter>
+
+ <!-- Internals -->
+
+ <chapter id="drmInternals">
+ <title>DRM Internals</title>
+ <para>
+ This chapter documents DRM internals relevant to driver authors
+ and developers working to add support for the latest features to
+ existing drivers.
+ </para>
+ <para>
+ First, we go over some typical driver initialization
+ requirements, like setting up command buffers, creating an
+ initial output configuration, and initializing core services.
+ Subsequent sections cover core internals in more detail,
+ providing implementation notes and examples.
+ </para>
+ <para>
+ The DRM layer provides several services to graphics drivers,
+ many of them driven by the application interfaces it provides
+ through libdrm, the library that wraps most of the DRM ioctls.
+ These include vblank event handling, memory
+ management, output management, framebuffer management, command
+ submission &amp; fencing, suspend/resume support, and DMA
+ services.
+ </para>
+
+ <!-- Internals: driver init -->
+
+ <sect1>
+ <title>Driver Initialization</title>
+ <para>
+ At the core of every DRM driver is a <structname>drm_driver</structname>
+ structure. Drivers typically statically initialize a drm_driver structure,
+ and then pass it to one of the <function>drm_*_init()</function> functions
+ to register it with the DRM subsystem.
+ </para>
+ <para>
+ The <structname>drm_driver</structname> structure contains static
+ information that describes the driver and features it supports, and
+ pointers to methods that the DRM core will call to implement the DRM API.
+ We will first go through the <structname>drm_driver</structname> static
+ information fields, and will then describe individual operations in
+ details as they get used in later sections.
+ </para>
+ <sect2>
+ <title>Driver Information</title>
+ <sect3>
+ <title>Driver Features</title>
+ <para>
+ Drivers inform the DRM core about their requirements and supported
+ features by setting appropriate flags in the
+ <structfield>driver_features</structfield> field. Since those flags
+ influence the DRM core behaviour since registration time, most of them
+ must be set to registering the <structname>drm_driver</structname>
+ instance.
+ </para>
+ <synopsis>u32 driver_features;</synopsis>
+ <variablelist>
+ <title>Driver Feature Flags</title>
+ <varlistentry>
+ <term>DRIVER_USE_AGP</term>
+ <listitem><para>
+ Driver uses AGP interface, the DRM core will manage AGP resources.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_REQUIRE_AGP</term>
+ <listitem><para>
+ Driver needs AGP interface to function. AGP initialization failure
+ will become a fatal error.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_USE_MTRR</term>
+ <listitem><para>
+ Driver uses MTRR interface for mapping memory, the DRM core will
+ manage MTRR resources. Deprecated.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_PCI_DMA</term>
+ <listitem><para>
+ Driver is capable of PCI DMA, mapping of PCI DMA buffers to
+ userspace will be enabled. Deprecated.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_SG</term>
+ <listitem><para>
+ Driver can perform scatter/gather DMA, allocation and mapping of
+ scatter/gather buffers will be enabled. Deprecated.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_HAVE_DMA</term>
+ <listitem><para>
+ Driver supports DMA, the userspace DMA API will be supported.
+ Deprecated.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_HAVE_IRQ</term><term>DRIVER_IRQ_SHARED</term>
+ <listitem><para>
+ DRIVER_HAVE_IRQ indicates whether the driver has an IRQ handler. The
+ DRM core will automatically register an interrupt handler when the
+ flag is set. DRIVER_IRQ_SHARED indicates whether the device &amp;
+ handler support shared IRQs (note that this is required of PCI
+ drivers).
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_IRQ_VBL</term>
+ <listitem><para>Unused. Deprecated.</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_DMA_QUEUE</term>
+ <listitem><para>
+ Should be set if the driver queues DMA requests and completes them
+ asynchronously. Deprecated.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_FB_DMA</term>
+ <listitem><para>
+ Driver supports DMA to/from the framebuffer, mapping of frambuffer
+ DMA buffers to userspace will be supported. Deprecated.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_IRQ_VBL2</term>
+ <listitem><para>Unused. Deprecated.</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_GEM</term>
+ <listitem><para>
+ Driver use the GEM memory manager.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_MODESET</term>
+ <listitem><para>
+ Driver supports mode setting interfaces (KMS).
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_PRIME</term>
+ <listitem><para>
+ Driver implements DRM PRIME buffer sharing.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </sect3>
+ <sect3>
+ <title>Major, Minor and Patchlevel</title>
+ <synopsis>int major;
+int minor;
+int patchlevel;</synopsis>
+ <para>
+ The DRM core identifies driver versions by a major, minor and patch
+ level triplet. The information is printed to the kernel log at
+ initialization time and passed to userspace through the
+ DRM_IOCTL_VERSION ioctl.
+ </para>
+ <para>
+ The major and minor numbers are also used to verify the requested driver
+ API version passed to DRM_IOCTL_SET_VERSION. When the driver API changes
+ between minor versions, applications can call DRM_IOCTL_SET_VERSION to
+ select a specific version of the API. If the requested major isn't equal
+ to the driver major, or the requested minor is larger than the driver
+ minor, the DRM_IOCTL_SET_VERSION call will return an error. Otherwise
+ the driver's set_version() method will be called with the requested
+ version.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Name, Description and Date</title>
+ <synopsis>char *name;
+char *desc;
+char *date;</synopsis>
+ <para>
+ The driver name is printed to the kernel log at initialization time,
+ used for IRQ registration and passed to userspace through
+ DRM_IOCTL_VERSION.
+ </para>
+ <para>
+ The driver description is a purely informative string passed to
+ userspace through the DRM_IOCTL_VERSION ioctl and otherwise unused by
+ the kernel.
+ </para>
+ <para>
+ The driver date, formatted as YYYYMMDD, is meant to identify the date of
+ the latest modification to the driver. However, as most drivers fail to
+ update it, its value is mostly useless. The DRM core prints it to the
+ kernel log at initialization time and passes it to userspace through the
+ DRM_IOCTL_VERSION ioctl.
+ </para>
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>Driver Load</title>
+ <para>
+ The <methodname>load</methodname> method is the driver and device
+ initialization entry point. The method is responsible for allocating and
+ initializing driver private data, specifying supported performance
+ counters, performing resource allocation and mapping (e.g. acquiring
+ clocks, mapping registers or allocating command buffers), initializing
+ the memory manager (<xref linkend="drm-memory-management"/>), installing
+ the IRQ handler (<xref linkend="drm-irq-registration"/>), setting up
+ vertical blanking handling (<xref linkend="drm-vertical-blank"/>), mode
+ setting (<xref linkend="drm-mode-setting"/>) and initial output
+ configuration (<xref linkend="drm-kms-init"/>).
+ </para>
+ <note><para>
+ If compatibility is a concern (e.g. with drivers converted over from
+ User Mode Setting to Kernel Mode Setting), care must be taken to prevent
+ device initialization and control that is incompatible with currently
+ active userspace drivers. For instance, if user level mode setting
+ drivers are in use, it would be problematic to perform output discovery
+ &amp; configuration at load time. Likewise, if user-level drivers
+ unaware of memory management are in use, memory management and command
+ buffer setup may need to be omitted. These requirements are
+ driver-specific, and care needs to be taken to keep both old and new
+ applications and libraries working.
+ </para></note>
+ <synopsis>int (*load) (struct drm_device *, unsigned long flags);</synopsis>
+ <para>
+ The method takes two arguments, a pointer to the newly created
+ <structname>drm_device</structname> and flags. The flags are used to
+ pass the <structfield>driver_data</structfield> field of the device id
+ corresponding to the device passed to <function>drm_*_init()</function>.
+ Only PCI devices currently use this, USB and platform DRM drivers have
+ their <methodname>load</methodname> method called with flags to 0.
+ </para>
+ <sect3>
+ <title>Driver Private &amp; Performance Counters</title>
+ <para>
+ The driver private hangs off the main
+ <structname>drm_device</structname> structure and can be used for
+ tracking various device-specific bits of information, like register
+ offsets, command buffer status, register state for suspend/resume, etc.
+ At load time, a driver may simply allocate one and set
+ <structname>drm_device</structname>.<structfield>dev_priv</structfield>
+ appropriately; it should be freed and
+ <structname>drm_device</structname>.<structfield>dev_priv</structfield>
+ set to NULL when the driver is unloaded.
+ </para>
+ <para>
+ DRM supports several counters which were used for rough performance
+ characterization. This stat counter system is deprecated and should not
+ be used. If performance monitoring is desired, the developer should
+ investigate and potentially enhance the kernel perf and tracing
+ infrastructure to export GPU related performance information for
+ consumption by performance monitoring tools and applications.
+ </para>
+ </sect3>
+ <sect3 id="drm-irq-registration">
+ <title>IRQ Registration</title>
+ <para>
+ The DRM core tries to facilitate IRQ handler registration and
+ unregistration by providing <function>drm_irq_install</function> and
+ <function>drm_irq_uninstall</function> functions. Those functions only
+ support a single interrupt per device.
+ </para>
+ <!--!Fdrivers/char/drm/drm_irq.c drm_irq_install-->
+ <para>
+ Both functions get the device IRQ by calling
+ <function>drm_dev_to_irq</function>. This inline function will call a
+ bus-specific operation to retrieve the IRQ number. For platform devices,
+ <function>platform_get_irq</function>(..., 0) is used to retrieve the
+ IRQ number.
+ </para>
+ <para>
+ <function>drm_irq_install</function> starts by calling the
+ <methodname>irq_preinstall</methodname> driver operation. The operation
+ is optional and must make sure that the interrupt will not get fired by
+ clearing all pending interrupt flags or disabling the interrupt.
+ </para>
+ <para>
+ The IRQ will then be requested by a call to
+ <function>request_irq</function>. If the DRIVER_IRQ_SHARED driver
+ feature flag is set, a shared (IRQF_SHARED) IRQ handler will be
+ requested.
+ </para>
+ <para>
+ The IRQ handler function must be provided as the mandatory irq_handler
+ driver operation. It will get passed directly to
+ <function>request_irq</function> and thus has the same prototype as all
+ IRQ handlers. It will get called with a pointer to the DRM device as the
+ second argument.
+ </para>
+ <para>
+ Finally the function calls the optional
+ <methodname>irq_postinstall</methodname> driver operation. The operation
+ usually enables interrupts (excluding the vblank interrupt, which is
+ enabled separately), but drivers may choose to enable/disable interrupts
+ at a different time.
+ </para>
+ <para>
+ <function>drm_irq_uninstall</function> is similarly used to uninstall an
+ IRQ handler. It starts by waking up all processes waiting on a vblank
+ interrupt to make sure they don't hang, and then calls the optional
+ <methodname>irq_uninstall</methodname> driver operation. The operation
+ must disable all hardware interrupts. Finally the function frees the IRQ
+ by calling <function>free_irq</function>.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Memory Manager Initialization</title>
+ <para>
+ Every DRM driver requires a memory manager which must be initialized at
+ load time. DRM currently contains two memory managers, the Translation
+ Table Manager (TTM) and the Graphics Execution Manager (GEM).
+ This document describes the use of the GEM memory manager only. See
+ <xref linkend="drm-memory-management"/> for details.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Miscellaneous Device Configuration</title>
+ <para>
+ Another task that may be necessary for PCI devices during configuration
+ is mapping the video BIOS. On many devices, the VBIOS describes device
+ configuration, LCD panel timings (if any), and contains flags indicating
+ device state. Mapping the BIOS can be done using the pci_map_rom() call,
+ a convenience function that takes care of mapping the actual ROM,
+ whether it has been shadowed into memory (typically at address 0xc0000)
+ or exists on the PCI device in the ROM BAR. Note that after the ROM has
+ been mapped and any necessary information has been extracted, it should
+ be unmapped; on many devices, the ROM address decoder is shared with
+ other BARs, so leaving it mapped could cause undesired behaviour like
+ hangs or memory corruption.
+ <!--!Fdrivers/pci/rom.c pci_map_rom-->
+ </para>
+ </sect3>
+ </sect2>
+ </sect1>
+
+ <!-- Internals: memory management -->
+
+ <sect1 id="drm-memory-management">
+ <title>Memory management</title>
+ <para>
+ Modern Linux systems require large amount of graphics memory to store
+ frame buffers, textures, vertices and other graphics-related data. Given
+ the very dynamic nature of many of that data, managing graphics memory
+ efficiently is thus crucial for the graphics stack and plays a central
+ role in the DRM infrastructure.
+ </para>
+ <para>
+ The DRM core includes two memory managers, namely Translation Table Maps
+ (TTM) and Graphics Execution Manager (GEM). TTM was the first DRM memory
+ manager to be developed and tried to be a one-size-fits-them all
+ solution. It provides a single userspace API to accomodate the need of
+ all hardware, supporting both Unified Memory Architecture (UMA) devices
+ and devices with dedicated video RAM (i.e. most discrete video cards).
+ This resulted in a large, complex piece of code that turned out to be
+ hard to use for driver development.
+ </para>
+ <para>
+ GEM started as an Intel-sponsored project in reaction to TTM's
+ complexity. Its design philosophy is completely different: instead of
+ providing a solution to every graphics memory-related problems, GEM
+ identified common code between drivers and created a support library to
+ share it. GEM has simpler initialization and execution requirements than
+ TTM, but has no video RAM management capabitilies and is thus limited to
+ UMA devices.
+ </para>
+ <sect2>
+ <title>The Translation Table Manager (TTM)</title>
+ <para>
+ TTM design background and information belongs here.
+ </para>
+ <sect3>
+ <title>TTM initialization</title>
+ <warning><para>This section is outdated.</para></warning>
+ <para>
+ Drivers wishing to support TTM must fill out a drm_bo_driver
+ structure. The structure contains several fields with function
+ pointers for initializing the TTM, allocating and freeing memory,
+ waiting for command completion and fence synchronization, and memory
+ migration. See the radeon_ttm.c file for an example of usage.
+ </para>
+ <para>
+ The ttm_global_reference structure is made up of several fields:
+ </para>
+ <programlisting>
+ struct ttm_global_reference {
+ enum ttm_global_types global_type;
+ size_t size;
+ void *object;
+ int (*init) (struct ttm_global_reference *);
+ void (*release) (struct ttm_global_reference *);
+ };
+ </programlisting>
+ <para>
+ There should be one global reference structure for your memory
+ manager as a whole, and there will be others for each object
+ created by the memory manager at runtime. Your global TTM should
+ have a type of TTM_GLOBAL_TTM_MEM. The size field for the global
+ object should be sizeof(struct ttm_mem_global), and the init and
+ release hooks should point at your driver-specific init and
+ release routines, which probably eventually call
+ ttm_mem_global_init and ttm_mem_global_release, respectively.
+ </para>
+ <para>
+ Once your global TTM accounting structure is set up and initialized
+ by calling ttm_global_item_ref() on it,
+ you need to create a buffer object TTM to
+ provide a pool for buffer object allocation by clients and the
+ kernel itself. The type of this object should be TTM_GLOBAL_TTM_BO,
+ and its size should be sizeof(struct ttm_bo_global). Again,
+ driver-specific init and release functions may be provided,
+ likely eventually calling ttm_bo_global_init() and
+ ttm_bo_global_release(), respectively. Also, like the previous
+ object, ttm_global_item_ref() is used to create an initial reference
+ count for the TTM, which will call your initialization function.
+ </para>
+ </sect3>
+ </sect2>
+ <sect2 id="drm-gem">
+ <title>The Graphics Execution Manager (GEM)</title>
+ <para>
+ The GEM design approach has resulted in a memory manager that doesn't
+ provide full coverage of all (or even all common) use cases in its
+ userspace or kernel API. GEM exposes a set of standard memory-related
+ operations to userspace and a set of helper functions to drivers, and let
+ drivers implement hardware-specific operations with their own private API.
+ </para>
+ <para>
+ The GEM userspace API is described in the
+ <ulink url="http://lwn.net/Articles/283798/"><citetitle>GEM - the Graphics
+ Execution Manager</citetitle></ulink> article on LWN. While slightly
+ outdated, the document provides a good overview of the GEM API principles.
+ Buffer allocation and read and write operations, described as part of the
+ common GEM API, are currently implemented using driver-specific ioctls.
+ </para>
+ <para>
+ GEM is data-agnostic. It manages abstract buffer objects without knowing
+ what individual buffers contain. APIs that require knowledge of buffer
+ contents or purpose, such as buffer allocation or synchronization
+ primitives, are thus outside of the scope of GEM and must be implemented
+ using driver-specific ioctls.
+ </para>
+ <para>
+ On a fundamental level, GEM involves several operations:
+ <itemizedlist>
+ <listitem>Memory allocation and freeing</listitem>
+ <listitem>Command execution</listitem>
+ <listitem>Aperture management at command execution time</listitem>
+ </itemizedlist>
+ Buffer object allocation is relatively straightforward and largely
+ provided by Linux's shmem layer, which provides memory to back each
+ object.
+ </para>
+ <para>
+ Device-specific operations, such as command execution, pinning, buffer
+ read &amp; write, mapping, and domain ownership transfers are left to
+ driver-specific ioctls.
+ </para>
+ <sect3>
+ <title>GEM Initialization</title>
+ <para>
+ Drivers that use GEM must set the DRIVER_GEM bit in the struct
+ <structname>drm_driver</structname>
+ <structfield>driver_features</structfield> field. The DRM core will
+ then automatically initialize the GEM core before calling the
+ <methodname>load</methodname> operation. Behind the scene, this will
+ create a DRM Memory Manager object which provides an address space
+ pool for object allocation.
+ </para>
+ <para>
+ In a KMS configuration, drivers need to allocate and initialize a
+ command ring buffer following core GEM initialization if required by
+ the hardware. UMA devices usually have what is called a "stolen"
+ memory region, which provides space for the initial framebuffer and
+ large, contiguous memory regions required by the device. This space is
+ typically not managed by GEM, and must be initialized separately into
+ its own DRM MM object.
+ </para>
+ </sect3>
+ <sect3>
+ <title>GEM Objects Creation</title>
+ <para>
+ GEM splits creation of GEM objects and allocation of the memory that
+ backs them in two distinct operations.
+ </para>
+ <para>
+ GEM objects are represented by an instance of struct
+ <structname>drm_gem_object</structname>. Drivers usually need to extend
+ GEM objects with private information and thus create a driver-specific
+ GEM object structure type that embeds an instance of struct
+ <structname>drm_gem_object</structname>.
+ </para>
+ <para>
+ To create a GEM object, a driver allocates memory for an instance of its
+ specific GEM object type and initializes the embedded struct
+ <structname>drm_gem_object</structname> with a call to
+ <function>drm_gem_object_init</function>. The function takes a pointer to
+ the DRM device, a pointer to the GEM object and the buffer object size
+ in bytes.
+ </para>
+ <para>
+ GEM uses shmem to allocate anonymous pageable memory.
+ <function>drm_gem_object_init</function> will create an shmfs file of
+ the requested size and store it into the struct
+ <structname>drm_gem_object</structname> <structfield>filp</structfield>
+ field. The memory is used as either main storage for the object when the
+ graphics hardware uses system memory directly or as a backing store
+ otherwise.
+ </para>
+ <para>
+ Drivers are responsible for the actual physical pages allocation by
+ calling <function>shmem_read_mapping_page_gfp</function> for each page.
+ Note that they can decide to allocate pages when initializing the GEM
+ object, or to delay allocation until the memory is needed (for instance
+ when a page fault occurs as a result of a userspace memory access or
+ when the driver needs to start a DMA transfer involving the memory).
+ </para>
+ <para>
+ Anonymous pageable memory allocation is not always desired, for instance
+ when the hardware requires physically contiguous system memory as is
+ often the case in embedded devices. Drivers can create GEM objects with
+ no shmfs backing (called private GEM objects) by initializing them with
+ a call to <function>drm_gem_private_object_init</function> instead of
+ <function>drm_gem_object_init</function>. Storage for private GEM
+ objects must be managed by drivers.
+ </para>
+ <para>
+ Drivers that do not need to extend GEM objects with private information
+ can call the <function>drm_gem_object_alloc</function> function to
+ allocate and initialize a struct <structname>drm_gem_object</structname>
+ instance. The GEM core will call the optional driver
+ <methodname>gem_init_object</methodname> operation after initializing
+ the GEM object with <function>drm_gem_object_init</function>.
+ <synopsis>int (*gem_init_object) (struct drm_gem_object *obj);</synopsis>
+ </para>
+ <para>
+ No alloc-and-init function exists for private GEM objects.
+ </para>
+ </sect3>
+ <sect3>
+ <title>GEM Objects Lifetime</title>
+ <para>
+ All GEM objects are reference-counted by the GEM core. References can be
+ acquired and release by <function>calling drm_gem_object_reference</function>
+ and <function>drm_gem_object_unreference</function> respectively. The
+ caller must hold the <structname>drm_device</structname>
+ <structfield>struct_mutex</structfield> lock. As a convenience, GEM
+ provides the <function>drm_gem_object_reference_unlocked</function> and
+ <function>drm_gem_object_unreference_unlocked</function> functions that
+ can be called without holding the lock.
+ </para>
+ <para>
+ When the last reference to a GEM object is released the GEM core calls
+ the <structname>drm_driver</structname>
+ <methodname>gem_free_object</methodname> operation. That operation is
+ mandatory for GEM-enabled drivers and must free the GEM object and all
+ associated resources.
+ </para>
+ <para>
+ <synopsis>void (*gem_free_object) (struct drm_gem_object *obj);</synopsis>
+ Drivers are responsible for freeing all GEM object resources, including
+ the resources created by the GEM core. If an mmap offset has been
+ created for the object (in which case
+ <structname>drm_gem_object</structname>::<structfield>map_list</structfield>::<structfield>map</structfield>
+ is not NULL) it must be freed by a call to
+ <function>drm_gem_free_mmap_offset</function>. The shmfs backing store
+ must be released by calling <function>drm_gem_object_release</function>
+ (that function can safely be called if no shmfs backing store has been
+ created).
+ </para>
+ </sect3>
+ <sect3>
+ <title>GEM Objects Naming</title>
+ <para>
+ Communication between userspace and the kernel refers to GEM objects
+ using local handles, global names or, more recently, file descriptors.
+ All of those are 32-bit integer values; the usual Linux kernel limits
+ apply to the file descriptors.
+ </para>
+ <para>
+ GEM handles are local to a DRM file. Applications get a handle to a GEM
+ object through a driver-specific ioctl, and can use that handle to refer
+ to the GEM object in other standard or driver-specific ioctls. Closing a
+ DRM file handle frees all its GEM handles and dereferences the
+ associated GEM objects.
+ </para>
+ <para>
+ To create a handle for a GEM object drivers call
+ <function>drm_gem_handle_create</function>. The function takes a pointer
+ to the DRM file and the GEM object and returns a locally unique handle.
+ When the handle is no longer needed drivers delete it with a call to
+ <function>drm_gem_handle_delete</function>. Finally the GEM object
+ associated with a handle can be retrieved by a call to
+ <function>drm_gem_object_lookup</function>.
+ </para>
+ <para>
+ Handles don't take ownership of GEM objects, they only take a reference
+ to the object that will be dropped when the handle is destroyed. To
+ avoid leaking GEM objects, drivers must make sure they drop the
+ reference(s) they own (such as the initial reference taken at object
+ creation time) as appropriate, without any special consideration for the
+ handle. For example, in the particular case of combined GEM object and
+ handle creation in the implementation of the
+ <methodname>dumb_create</methodname> operation, drivers must drop the
+ initial reference to the GEM object before returning the handle.
+ </para>
+ <para>
+ GEM names are similar in purpose to handles but are not local to DRM
+ files. They can be passed between processes to reference a GEM object
+ globally. Names can't be used directly to refer to objects in the DRM
+ API, applications must convert handles to names and names to handles
+ using the DRM_IOCTL_GEM_FLINK and DRM_IOCTL_GEM_OPEN ioctls
+ respectively. The conversion is handled by the DRM core without any
+ driver-specific support.
+ </para>
+ <para>
+ Similar to global names, GEM file descriptors are also used to share GEM
+ objects across processes. They offer additional security: as file
+ descriptors must be explictly sent over UNIX domain sockets to be shared
+ between applications, they can't be guessed like the globally unique GEM
+ names.
+ </para>
+ <para>
+ Drivers that support GEM file descriptors, also known as the DRM PRIME
+ API, must set the DRIVER_PRIME bit in the struct
+ <structname>drm_driver</structname>
+ <structfield>driver_features</structfield> field, and implement the
+ <methodname>prime_handle_to_fd</methodname> and
+ <methodname>prime_fd_to_handle</methodname> operations.
+ </para>
+ <para>
+ <synopsis>int (*prime_handle_to_fd)(struct drm_device *dev,
+ struct drm_file *file_priv, uint32_t handle,
+ uint32_t flags, int *prime_fd);
+ int (*prime_fd_to_handle)(struct drm_device *dev,
+ struct drm_file *file_priv, int prime_fd,
+ uint32_t *handle);</synopsis>
+ Those two operations convert a handle to a PRIME file descriptor and
+ vice versa. Drivers must use the kernel dma-buf buffer sharing framework
+ to manage the PRIME file descriptors.
+ </para>
+ <para>
+ While non-GEM drivers must implement the operations themselves, GEM
+ drivers must use the <function>drm_gem_prime_handle_to_fd</function>
+ and <function>drm_gem_prime_fd_to_handle</function> helper functions.
+ Those helpers rely on the driver
+ <methodname>gem_prime_export</methodname> and
+ <methodname>gem_prime_import</methodname> operations to create a dma-buf
+ instance from a GEM object (dma-buf exporter role) and to create a GEM
+ object from a dma-buf instance (dma-buf importer role).
+ </para>
+ <para>
+ <synopsis>struct dma_buf * (*gem_prime_export)(struct drm_device *dev,
+ struct drm_gem_object *obj,
+ int flags);
+ struct drm_gem_object * (*gem_prime_import)(struct drm_device *dev,
+ struct dma_buf *dma_buf);</synopsis>
+ These two operations are mandatory for GEM drivers that support DRM
+ PRIME.
+ </para>
+ <sect4>
+ <title>DRM PRIME Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_prime.c PRIME Helpers
+ </sect4>
+ </sect3>
+ <sect3 id="drm-gem-objects-mapping">
+ <title>GEM Objects Mapping</title>
+ <para>
+ Because mapping operations are fairly heavyweight GEM favours
+ read/write-like access to buffers, implemented through driver-specific
+ ioctls, over mapping buffers to userspace. However, when random access
+ to the buffer is needed (to perform software rendering for instance),
+ direct access to the object can be more efficient.
+ </para>
+ <para>
+ The mmap system call can't be used directly to map GEM objects, as they
+ don't have their own file handle. Two alternative methods currently
+ co-exist to map GEM objects to userspace. The first method uses a
+ driver-specific ioctl to perform the mapping operation, calling
+ <function>do_mmap</function> under the hood. This is often considered
+ dubious, seems to be discouraged for new GEM-enabled drivers, and will
+ thus not be described here.
+ </para>
+ <para>
+ The second method uses the mmap system call on the DRM file handle.
+ <synopsis>void *mmap(void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset);</synopsis>
+ DRM identifies the GEM object to be mapped by a fake offset passed
+ through the mmap offset argument. Prior to being mapped, a GEM object
+ must thus be associated with a fake offset. To do so, drivers must call
+ <function>drm_gem_create_mmap_offset</function> on the object. The
+ function allocates a fake offset range from a pool and stores the
+ offset divided by PAGE_SIZE in
+ <literal>obj-&gt;map_list.hash.key</literal>. Care must be taken not to
+ call <function>drm_gem_create_mmap_offset</function> if a fake offset
+ has already been allocated for the object. This can be tested by
+ <literal>obj-&gt;map_list.map</literal> being non-NULL.
+ </para>
+ <para>
+ Once allocated, the fake offset value
+ (<literal>obj-&gt;map_list.hash.key &lt;&lt; PAGE_SHIFT</literal>)
+ must be passed to the application in a driver-specific way and can then
+ be used as the mmap offset argument.
+ </para>
+ <para>
+ The GEM core provides a helper method <function>drm_gem_mmap</function>
+ to handle object mapping. The method can be set directly as the mmap
+ file operation handler. It will look up the GEM object based on the
+ offset value and set the VMA operations to the
+ <structname>drm_driver</structname> <structfield>gem_vm_ops</structfield>
+ field. Note that <function>drm_gem_mmap</function> doesn't map memory to
+ userspace, but relies on the driver-provided fault handler to map pages
+ individually.
+ </para>
+ <para>
+ To use <function>drm_gem_mmap</function>, drivers must fill the struct
+ <structname>drm_driver</structname> <structfield>gem_vm_ops</structfield>
+ field with a pointer to VM operations.
+ </para>
+ <para>
+ <synopsis>struct vm_operations_struct *gem_vm_ops
+
+ struct vm_operations_struct {
+ void (*open)(struct vm_area_struct * area);
+ void (*close)(struct vm_area_struct * area);
+ int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+ };</synopsis>
+ </para>
+ <para>
+ The <methodname>open</methodname> and <methodname>close</methodname>
+ operations must update the GEM object reference count. Drivers can use
+ the <function>drm_gem_vm_open</function> and
+ <function>drm_gem_vm_close</function> helper functions directly as open
+ and close handlers.
+ </para>
+ <para>
+ The fault operation handler is responsible for mapping individual pages
+ to userspace when a page fault occurs. Depending on the memory
+ allocation scheme, drivers can allocate pages at fault time, or can
+ decide to allocate memory for the GEM object at the time the object is
+ created.
+ </para>
+ <para>
+ Drivers that want to map the GEM object upfront instead of handling page
+ faults can implement their own mmap file operation handler.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Dumb GEM Objects</title>
+ <para>
+ The GEM API doesn't standardize GEM objects creation and leaves it to
+ driver-specific ioctls. While not an issue for full-fledged graphics
+ stacks that include device-specific userspace components (in libdrm for
+ instance), this limit makes DRM-based early boot graphics unnecessarily
+ complex.
+ </para>
+ <para>
+ Dumb GEM objects partly alleviate the problem by providing a standard
+ API to create dumb buffers suitable for scanout, which can then be used
+ to create KMS frame buffers.
+ </para>
+ <para>
+ To support dumb GEM objects drivers must implement the
+ <methodname>dumb_create</methodname>,
+ <methodname>dumb_destroy</methodname> and
+ <methodname>dumb_map_offset</methodname> operations.
+ </para>
+ <itemizedlist>
+ <listitem>
+ <synopsis>int (*dumb_create)(struct drm_file *file_priv, struct drm_device *dev,
+ struct drm_mode_create_dumb *args);</synopsis>
+ <para>
+ The <methodname>dumb_create</methodname> operation creates a GEM
+ object suitable for scanout based on the width, height and depth
+ from the struct <structname>drm_mode_create_dumb</structname>
+ argument. It fills the argument's <structfield>handle</structfield>,
+ <structfield>pitch</structfield> and <structfield>size</structfield>
+ fields with a handle for the newly created GEM object and its line
+ pitch and size in bytes.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*dumb_destroy)(struct drm_file *file_priv, struct drm_device *dev,
+ uint32_t handle);</synopsis>
+ <para>
+ The <methodname>dumb_destroy</methodname> operation destroys a dumb
+ GEM object created by <methodname>dumb_create</methodname>.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*dumb_map_offset)(struct drm_file *file_priv, struct drm_device *dev,
+ uint32_t handle, uint64_t *offset);</synopsis>
+ <para>
+ The <methodname>dumb_map_offset</methodname> operation associates an
+ mmap fake offset with the GEM object given by the handle and returns
+ it. Drivers must use the
+ <function>drm_gem_create_mmap_offset</function> function to
+ associate the fake offset as described in
+ <xref linkend="drm-gem-objects-mapping"/>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect3>
+ <sect3>
+ <title>Memory Coherency</title>
+ <para>
+ When mapped to the device or used in a command buffer, backing pages
+ for an object are flushed to memory and marked write combined so as to
+ be coherent with the GPU. Likewise, if the CPU accesses an object
+ after the GPU has finished rendering to the object, then the object
+ must be made coherent with the CPU's view of memory, usually involving
+ GPU cache flushing of various kinds. This core CPU&lt;-&gt;GPU
+ coherency management is provided by a device-specific ioctl, which
+ evaluates an object's current domain and performs any necessary
+ flushing or synchronization to put the object into the desired
+ coherency domain (note that the object may be busy, i.e. an active
+ render target; in that case, setting the domain blocks the client and
+ waits for rendering to complete before performing any necessary
+ flushing operations).
+ </para>
+ </sect3>
+ <sect3>
+ <title>Command Execution</title>
+ <para>
+ Perhaps the most important GEM function for GPU devices is providing a
+ command execution interface to clients. Client programs construct
+ command buffers containing references to previously allocated memory
+ objects, and then submit them to GEM. At that point, GEM takes care to
+ bind all the objects into the GTT, execute the buffer, and provide
+ necessary synchronization between clients accessing the same buffers.
+ This often involves evicting some objects from the GTT and re-binding
+ others (a fairly expensive operation), and providing relocation
+ support which hides fixed GTT offsets from clients. Clients must take
+ care not to submit command buffers that reference more objects than
+ can fit in the GTT; otherwise, GEM will reject them and no rendering
+ will occur. Similarly, if several objects in the buffer require fence
+ registers to be allocated for correct rendering (e.g. 2D blits on
+ pre-965 chips), care must be taken not to require more fence registers
+ than are available to the client. Such resource management should be
+ abstracted from the client in libdrm.
+ </para>
+ </sect3>
+ </sect2>
+ </sect1>
+
+ <!-- Internals: mode setting -->
+
+ <sect1 id="drm-mode-setting">
+ <title>Mode Setting</title>
+ <para>
+ Drivers must initialize the mode setting core by calling
+ <function>drm_mode_config_init</function> on the DRM device. The function
+ initializes the <structname>drm_device</structname>
+ <structfield>mode_config</structfield> field and never fails. Once done,
+ mode configuration must be setup by initializing the following fields.
+ </para>
+ <itemizedlist>
+ <listitem>
+ <synopsis>int min_width, min_height;
+int max_width, max_height;</synopsis>
+ <para>
+ Minimum and maximum width and height of the frame buffers in pixel
+ units.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>struct drm_mode_config_funcs *funcs;</synopsis>
+ <para>Mode setting functions.</para>
+ </listitem>
+ </itemizedlist>
+ <sect2>
+ <title>Frame Buffer Creation</title>
+ <synopsis>struct drm_framebuffer *(*fb_create)(struct drm_device *dev,
+ struct drm_file *file_priv,
+ struct drm_mode_fb_cmd2 *mode_cmd);</synopsis>
+ <para>
+ Frame buffers are abstract memory objects that provide a source of
+ pixels to scanout to a CRTC. Applications explicitly request the
+ creation of frame buffers through the DRM_IOCTL_MODE_ADDFB(2) ioctls and
+ receive an opaque handle that can be passed to the KMS CRTC control,
+ plane configuration and page flip functions.
+ </para>
+ <para>
+ Frame buffers rely on the underneath memory manager for low-level memory
+ operations. When creating a frame buffer applications pass a memory
+ handle (or a list of memory handles for multi-planar formats) through
+ the <parameter>drm_mode_fb_cmd2</parameter> argument. This document
+ assumes that the driver uses GEM, those handles thus reference GEM
+ objects.
+ </para>
+ <para>
+ Drivers must first validate the requested frame buffer parameters passed
+ through the mode_cmd argument. In particular this is where invalid
+ sizes, pixel formats or pitches can be caught.
+ </para>
+ <para>
+ If the parameters are deemed valid, drivers then create, initialize and
+ return an instance of struct <structname>drm_framebuffer</structname>.
+ If desired the instance can be embedded in a larger driver-specific
+ structure. Drivers must fill its <structfield>width</structfield>,
+ <structfield>height</structfield>, <structfield>pitches</structfield>,
+ <structfield>offsets</structfield>, <structfield>depth</structfield>,
+ <structfield>bits_per_pixel</structfield> and
+ <structfield>pixel_format</structfield> fields from the values passed
+ through the <parameter>drm_mode_fb_cmd2</parameter> argument. They
+ should call the <function>drm_helper_mode_fill_fb_struct</function>
+ helper function to do so.
+ </para>
+
+ <para>
+ The initailization of the new framebuffer instance is finalized with a
+ call to <function>drm_framebuffer_init</function> which takes a pointer
+ to DRM frame buffer operations (struct
+ <structname>drm_framebuffer_funcs</structname>). Note that this function
+ publishes the framebuffer and so from this point on it can be accessed
+ concurrently from other threads. Hence it must be the last step in the
+ driver's framebuffer initialization sequence. Frame buffer operations
+ are
+ <itemizedlist>
+ <listitem>
+ <synopsis>int (*create_handle)(struct drm_framebuffer *fb,
+ struct drm_file *file_priv, unsigned int *handle);</synopsis>
+ <para>
+ Create a handle to the frame buffer underlying memory object. If
+ the frame buffer uses a multi-plane format, the handle will
+ reference the memory object associated with the first plane.
+ </para>
+ <para>
+ Drivers call <function>drm_gem_handle_create</function> to create
+ the handle.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*destroy)(struct drm_framebuffer *framebuffer);</synopsis>
+ <para>
+ Destroy the frame buffer object and frees all associated
+ resources. Drivers must call
+ <function>drm_framebuffer_cleanup</function> to free resources
+ allocated by the DRM core for the frame buffer object, and must
+ make sure to unreference all memory objects associated with the
+ frame buffer. Handles created by the
+ <methodname>create_handle</methodname> operation are released by
+ the DRM core.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*dirty)(struct drm_framebuffer *framebuffer,
+ struct drm_file *file_priv, unsigned flags, unsigned color,
+ struct drm_clip_rect *clips, unsigned num_clips);</synopsis>
+ <para>
+ This optional operation notifies the driver that a region of the
+ frame buffer has changed in response to a DRM_IOCTL_MODE_DIRTYFB
+ ioctl call.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The lifetime of a drm framebuffer is controlled with a reference count,
+ drivers can grab additional references with
+ <function>drm_framebuffer_reference</function> </para> and drop them
+ again with <function>drm_framebuffer_unreference</function>. For
+ driver-private framebuffers for which the last reference is never
+ dropped (e.g. for the fbdev framebuffer when the struct
+ <structname>drm_framebuffer</structname> is embedded into the fbdev
+ helper struct) drivers can manually clean up a framebuffer at module
+ unload time with
+ <function>drm_framebuffer_unregister_private</function>.
+ </sect2>
+ <sect2>
+ <title>Output Polling</title>
+ <synopsis>void (*output_poll_changed)(struct drm_device *dev);</synopsis>
+ <para>
+ This operation notifies the driver that the status of one or more
+ connectors has changed. Drivers that use the fb helper can just call the
+ <function>drm_fb_helper_hotplug_event</function> function to handle this
+ operation.
+ </para>
+ </sect2>
+ <sect2>
+ <title>Locking</title>
+ <para>
+ Beside some lookup structures with their own locking (which is hidden
+ behind the interface functions) most of the modeset state is protected
+ by the <code>dev-&lt;mode_config.lock</code> mutex and additionally
+ per-crtc locks to allow cursor updates, pageflips and similar operations
+ to occur concurrently with background tasks like output detection.
+ Operations which cross domains like a full modeset always grab all
+ locks. Drivers there need to protect resources shared between crtcs with
+ additional locking. They also need to be careful to always grab the
+ relevant crtc locks if a modset functions touches crtc state, e.g. for
+ load detection (which does only grab the <code>mode_config.lock</code>
+ to allow concurrent screen updates on live crtcs).
+ </para>
+ </sect2>
+ </sect1>
+
+ <!-- Internals: kms initialization and cleanup -->
+
+ <sect1 id="drm-kms-init">
+ <title>KMS Initialization and Cleanup</title>
+ <para>
+ A KMS device is abstracted and exposed as a set of planes, CRTCs, encoders
+ and connectors. KMS drivers must thus create and initialize all those
+ objects at load time after initializing mode setting.
+ </para>
+ <sect2>
+ <title>CRTCs (struct <structname>drm_crtc</structname>)</title>
+ <para>
+ A CRTC is an abstraction representing a part of the chip that contains a
+ pointer to a scanout buffer. Therefore, the number of CRTCs available
+ determines how many independent scanout buffers can be active at any
+ given time. The CRTC structure contains several fields to support this:
+ a pointer to some video memory (abstracted as a frame buffer object), a
+ display mode, and an (x, y) offset into the video memory to support
+ panning or configurations where one piece of video memory spans multiple
+ CRTCs.
+ </para>
+ <sect3>
+ <title>CRTC Initialization</title>
+ <para>
+ A KMS device must create and register at least one struct
+ <structname>drm_crtc</structname> instance. The instance is allocated
+ and zeroed by the driver, possibly as part of a larger structure, and
+ registered with a call to <function>drm_crtc_init</function> with a
+ pointer to CRTC functions.
+ </para>
+ </sect3>
+ <sect3>
+ <title>CRTC Operations</title>
+ <sect4>
+ <title>Set Configuration</title>
+ <synopsis>int (*set_config)(struct drm_mode_set *set);</synopsis>
+ <para>
+ Apply a new CRTC configuration to the device. The configuration
+ specifies a CRTC, a frame buffer to scan out from, a (x,y) position in
+ the frame buffer, a display mode and an array of connectors to drive
+ with the CRTC if possible.
+ </para>
+ <para>
+ If the frame buffer specified in the configuration is NULL, the driver
+ must detach all encoders connected to the CRTC and all connectors
+ attached to those encoders and disable them.
+ </para>
+ <para>
+ This operation is called with the mode config lock held.
+ </para>
+ <note><para>
+ FIXME: How should set_config interact with DPMS? If the CRTC is
+ suspended, should it be resumed?
+ </para></note>
+ </sect4>
+ <sect4>
+ <title>Page Flipping</title>
+ <synopsis>int (*page_flip)(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event);</synopsis>
+ <para>
+ Schedule a page flip to the given frame buffer for the CRTC. This
+ operation is called with the mode config mutex held.
+ </para>
+ <para>
+ Page flipping is a synchronization mechanism that replaces the frame
+ buffer being scanned out by the CRTC with a new frame buffer during
+ vertical blanking, avoiding tearing. When an application requests a page
+ flip the DRM core verifies that the new frame buffer is large enough to
+ be scanned out by the CRTC in the currently configured mode and then
+ calls the CRTC <methodname>page_flip</methodname> operation with a
+ pointer to the new frame buffer.
+ </para>
+ <para>
+ The <methodname>page_flip</methodname> operation schedules a page flip.
+ Once any pending rendering targetting the new frame buffer has
+ completed, the CRTC will be reprogrammed to display that frame buffer
+ after the next vertical refresh. The operation must return immediately
+ without waiting for rendering or page flip to complete and must block
+ any new rendering to the frame buffer until the page flip completes.
+ </para>
+ <para>
+ If a page flip can be successfully scheduled the driver must set the
+ <code>drm_crtc-&lt;fb</code> field to the new framebuffer pointed to
+ by <code>fb</code>. This is important so that the reference counting
+ on framebuffers stays balanced.
+ </para>
+ <para>
+ If a page flip is already pending, the
+ <methodname>page_flip</methodname> operation must return
+ -<errorname>EBUSY</errorname>.
+ </para>
+ <para>
+ To synchronize page flip to vertical blanking the driver will likely
+ need to enable vertical blanking interrupts. It should call
+ <function>drm_vblank_get</function> for that purpose, and call
+ <function>drm_vblank_put</function> after the page flip completes.
+ </para>
+ <para>
+ If the application has requested to be notified when page flip completes
+ the <methodname>page_flip</methodname> operation will be called with a
+ non-NULL <parameter>event</parameter> argument pointing to a
+ <structname>drm_pending_vblank_event</structname> instance. Upon page
+ flip completion the driver must call <methodname>drm_send_vblank_event</methodname>
+ to fill in the event and send to wake up any waiting processes.
+ This can be performed with
+ <programlisting><![CDATA[
+ spin_lock_irqsave(&dev->event_lock, flags);
+ ...
+ drm_send_vblank_event(dev, pipe, event);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ ]]></programlisting>
+ </para>
+ <note><para>
+ FIXME: Could drivers that don't need to wait for rendering to complete
+ just add the event to <literal>dev-&gt;vblank_event_list</literal> and
+ let the DRM core handle everything, as for "normal" vertical blanking
+ events?
+ </para></note>
+ <para>
+ While waiting for the page flip to complete, the
+ <literal>event-&gt;base.link</literal> list head can be used freely by
+ the driver to store the pending event in a driver-specific list.
+ </para>
+ <para>
+ If the file handle is closed before the event is signaled, drivers must
+ take care to destroy the event in their
+ <methodname>preclose</methodname> operation (and, if needed, call
+ <function>drm_vblank_put</function>).
+ </para>
+ </sect4>
+ <sect4>
+ <title>Miscellaneous</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>void (*gamma_set)(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
+ uint32_t start, uint32_t size);</synopsis>
+ <para>
+ Apply a gamma table to the device. The operation is optional.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*destroy)(struct drm_crtc *crtc);</synopsis>
+ <para>
+ Destroy the CRTC when not needed anymore. See
+ <xref linkend="drm-kms-init"/>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect4>
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>Planes (struct <structname>drm_plane</structname>)</title>
+ <para>
+ A plane represents an image source that can be blended with or overlayed
+ on top of a CRTC during the scanout process. Planes are associated with
+ a frame buffer to crop a portion of the image memory (source) and
+ optionally scale it to a destination size. The result is then blended
+ with or overlayed on top of a CRTC.
+ </para>
+ <sect3>
+ <title>Plane Initialization</title>
+ <para>
+ Planes are optional. To create a plane, a KMS drivers allocates and
+ zeroes an instances of struct <structname>drm_plane</structname>
+ (possibly as part of a larger structure) and registers it with a call
+ to <function>drm_plane_init</function>. The function takes a bitmask
+ of the CRTCs that can be associated with the plane, a pointer to the
+ plane functions and a list of format supported formats.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Plane Operations</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>int (*update_plane)(struct drm_plane *plane, struct drm_crtc *crtc,
+ struct drm_framebuffer *fb, int crtc_x, int crtc_y,
+ unsigned int crtc_w, unsigned int crtc_h,
+ uint32_t src_x, uint32_t src_y,
+ uint32_t src_w, uint32_t src_h);</synopsis>
+ <para>
+ Enable and configure the plane to use the given CRTC and frame buffer.
+ </para>
+ <para>
+ The source rectangle in frame buffer memory coordinates is given by
+ the <parameter>src_x</parameter>, <parameter>src_y</parameter>,
+ <parameter>src_w</parameter> and <parameter>src_h</parameter>
+ parameters (as 16.16 fixed point values). Devices that don't support
+ subpixel plane coordinates can ignore the fractional part.
+ </para>
+ <para>
+ The destination rectangle in CRTC coordinates is given by the
+ <parameter>crtc_x</parameter>, <parameter>crtc_y</parameter>,
+ <parameter>crtc_w</parameter> and <parameter>crtc_h</parameter>
+ parameters (as integer values). Devices scale the source rectangle to
+ the destination rectangle. If scaling is not supported, and the source
+ rectangle size doesn't match the destination rectangle size, the
+ driver must return a -<errorname>EINVAL</errorname> error.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*disable_plane)(struct drm_plane *plane);</synopsis>
+ <para>
+ Disable the plane. The DRM core calls this method in response to a
+ DRM_IOCTL_MODE_SETPLANE ioctl call with the frame buffer ID set to 0.
+ Disabled planes must not be processed by the CRTC.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*destroy)(struct drm_plane *plane);</synopsis>
+ <para>
+ Destroy the plane when not needed anymore. See
+ <xref linkend="drm-kms-init"/>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>Encoders (struct <structname>drm_encoder</structname>)</title>
+ <para>
+ An encoder takes pixel data from a CRTC and converts it to a format
+ suitable for any attached connectors. On some devices, it may be
+ possible to have a CRTC send data to more than one encoder. In that
+ case, both encoders would receive data from the same scanout buffer,
+ resulting in a "cloned" display configuration across the connectors
+ attached to each encoder.
+ </para>
+ <sect3>
+ <title>Encoder Initialization</title>
+ <para>
+ As for CRTCs, a KMS driver must create, initialize and register at
+ least one struct <structname>drm_encoder</structname> instance. The
+ instance is allocated and zeroed by the driver, possibly as part of a
+ larger structure.
+ </para>
+ <para>
+ Drivers must initialize the struct <structname>drm_encoder</structname>
+ <structfield>possible_crtcs</structfield> and
+ <structfield>possible_clones</structfield> fields before registering the
+ encoder. Both fields are bitmasks of respectively the CRTCs that the
+ encoder can be connected to, and sibling encoders candidate for cloning.
+ </para>
+ <para>
+ After being initialized, the encoder must be registered with a call to
+ <function>drm_encoder_init</function>. The function takes a pointer to
+ the encoder functions and an encoder type. Supported types are
+ <itemizedlist>
+ <listitem>
+ DRM_MODE_ENCODER_DAC for VGA and analog on DVI-I/DVI-A
+ </listitem>
+ <listitem>
+ DRM_MODE_ENCODER_TMDS for DVI, HDMI and (embedded) DisplayPort
+ </listitem>
+ <listitem>
+ DRM_MODE_ENCODER_LVDS for display panels
+ </listitem>
+ <listitem>
+ DRM_MODE_ENCODER_TVDAC for TV output (Composite, S-Video, Component,
+ SCART)
+ </listitem>
+ <listitem>
+ DRM_MODE_ENCODER_VIRTUAL for virtual machine displays
+ </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ Encoders must be attached to a CRTC to be used. DRM drivers leave
+ encoders unattached at initialization time. Applications (or the fbdev
+ compatibility layer when implemented) are responsible for attaching the
+ encoders they want to use to a CRTC.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Encoder Operations</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>void (*destroy)(struct drm_encoder *encoder);</synopsis>
+ <para>
+ Called to destroy the encoder when not needed anymore. See
+ <xref linkend="drm-kms-init"/>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>Connectors (struct <structname>drm_connector</structname>)</title>
+ <para>
+ A connector is the final destination for pixel data on a device, and
+ usually connects directly to an external display device like a monitor
+ or laptop panel. A connector can only be attached to one encoder at a
+ time. The connector is also the structure where information about the
+ attached display is kept, so it contains fields for display data, EDID
+ data, DPMS &amp; connection status, and information about modes
+ supported on the attached displays.
+ </para>
+ <sect3>
+ <title>Connector Initialization</title>
+ <para>
+ Finally a KMS driver must create, initialize, register and attach at
+ least one struct <structname>drm_connector</structname> instance. The
+ instance is created as other KMS objects and initialized by setting the
+ following fields.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><structfield>interlace_allowed</structfield></term>
+ <listitem><para>
+ Whether the connector can handle interlaced modes.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><structfield>doublescan_allowed</structfield></term>
+ <listitem><para>
+ Whether the connector can handle doublescan.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><structfield>display_info
+ </structfield></term>
+ <listitem><para>
+ Display information is filled from EDID information when a display
+ is detected. For non hot-pluggable displays such as flat panels in
+ embedded systems, the driver should initialize the
+ <structfield>display_info</structfield>.<structfield>width_mm</structfield>
+ and
+ <structfield>display_info</structfield>.<structfield>height_mm</structfield>
+ fields with the physical size of the display.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term id="drm-kms-connector-polled"><structfield>polled</structfield></term>
+ <listitem><para>
+ Connector polling mode, a combination of
+ <variablelist>
+ <varlistentry>
+ <term>DRM_CONNECTOR_POLL_HPD</term>
+ <listitem><para>
+ The connector generates hotplug events and doesn't need to be
+ periodically polled. The CONNECT and DISCONNECT flags must not
+ be set together with the HPD flag.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_CONNECTOR_POLL_CONNECT</term>
+ <listitem><para>
+ Periodically poll the connector for connection.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_CONNECTOR_POLL_DISCONNECT</term>
+ <listitem><para>
+ Periodically poll the connector for disconnection.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ Set to 0 for connectors that don't support connection status
+ discovery.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ <para>
+ The connector is then registered with a call to
+ <function>drm_connector_init</function> with a pointer to the connector
+ functions and a connector type, and exposed through sysfs with a call to
+ <function>drm_sysfs_connector_add</function>.
+ </para>
+ <para>
+ Supported connector types are
+ <itemizedlist>
+ <listitem>DRM_MODE_CONNECTOR_VGA</listitem>
+ <listitem>DRM_MODE_CONNECTOR_DVII</listitem>
+ <listitem>DRM_MODE_CONNECTOR_DVID</listitem>
+ <listitem>DRM_MODE_CONNECTOR_DVIA</listitem>
+ <listitem>DRM_MODE_CONNECTOR_Composite</listitem>
+ <listitem>DRM_MODE_CONNECTOR_SVIDEO</listitem>
+ <listitem>DRM_MODE_CONNECTOR_LVDS</listitem>
+ <listitem>DRM_MODE_CONNECTOR_Component</listitem>
+ <listitem>DRM_MODE_CONNECTOR_9PinDIN</listitem>
+ <listitem>DRM_MODE_CONNECTOR_DisplayPort</listitem>
+ <listitem>DRM_MODE_CONNECTOR_HDMIA</listitem>
+ <listitem>DRM_MODE_CONNECTOR_HDMIB</listitem>
+ <listitem>DRM_MODE_CONNECTOR_TV</listitem>
+ <listitem>DRM_MODE_CONNECTOR_eDP</listitem>
+ <listitem>DRM_MODE_CONNECTOR_VIRTUAL</listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ Connectors must be attached to an encoder to be used. For devices that
+ map connectors to encoders 1:1, the connector should be attached at
+ initialization time with a call to
+ <function>drm_mode_connector_attach_encoder</function>. The driver must
+ also set the <structname>drm_connector</structname>
+ <structfield>encoder</structfield> field to point to the attached
+ encoder.
+ </para>
+ <para>
+ Finally, drivers must initialize the connectors state change detection
+ with a call to <function>drm_kms_helper_poll_init</function>. If at
+ least one connector is pollable but can't generate hotplug interrupts
+ (indicated by the DRM_CONNECTOR_POLL_CONNECT and
+ DRM_CONNECTOR_POLL_DISCONNECT connector flags), a delayed work will
+ automatically be queued to periodically poll for changes. Connectors
+ that can generate hotplug interrupts must be marked with the
+ DRM_CONNECTOR_POLL_HPD flag instead, and their interrupt handler must
+ call <function>drm_helper_hpd_irq_event</function>. The function will
+ queue a delayed work to check the state of all connectors, but no
+ periodic polling will be done.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Connector Operations</title>
+ <note><para>
+ Unless otherwise state, all operations are mandatory.
+ </para></note>
+ <sect4>
+ <title>DPMS</title>
+ <synopsis>void (*dpms)(struct drm_connector *connector, int mode);</synopsis>
+ <para>
+ The DPMS operation sets the power state of a connector. The mode
+ argument is one of
+ <itemizedlist>
+ <listitem><para>DRM_MODE_DPMS_ON</para></listitem>
+ <listitem><para>DRM_MODE_DPMS_STANDBY</para></listitem>
+ <listitem><para>DRM_MODE_DPMS_SUSPEND</para></listitem>
+ <listitem><para>DRM_MODE_DPMS_OFF</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ In all but DPMS_ON mode the encoder to which the connector is attached
+ should put the display in low-power mode by driving its signals
+ appropriately. If more than one connector is attached to the encoder
+ care should be taken not to change the power state of other displays as
+ a side effect. Low-power mode should be propagated to the encoders and
+ CRTCs when all related connectors are put in low-power mode.
+ </para>
+ </sect4>
+ <sect4>
+ <title>Modes</title>
+ <synopsis>int (*fill_modes)(struct drm_connector *connector, uint32_t max_width,
+ uint32_t max_height);</synopsis>
+ <para>
+ Fill the mode list with all supported modes for the connector. If the
+ <parameter>max_width</parameter> and <parameter>max_height</parameter>
+ arguments are non-zero, the implementation must ignore all modes wider
+ than <parameter>max_width</parameter> or higher than
+ <parameter>max_height</parameter>.
+ </para>
+ <para>
+ The connector must also fill in this operation its
+ <structfield>display_info</structfield>
+ <structfield>width_mm</structfield> and
+ <structfield>height_mm</structfield> fields with the connected display
+ physical size in millimeters. The fields should be set to 0 if the value
+ isn't known or is not applicable (for instance for projector devices).
+ </para>
+ </sect4>
+ <sect4>
+ <title>Connection Status</title>
+ <para>
+ The connection status is updated through polling or hotplug events when
+ supported (see <xref linkend="drm-kms-connector-polled"/>). The status
+ value is reported to userspace through ioctls and must not be used
+ inside the driver, as it only gets initialized by a call to
+ <function>drm_mode_getconnector</function> from userspace.
+ </para>
+ <synopsis>enum drm_connector_status (*detect)(struct drm_connector *connector,
+ bool force);</synopsis>
+ <para>
+ Check to see if anything is attached to the connector. The
+ <parameter>force</parameter> parameter is set to false whilst polling or
+ to true when checking the connector due to user request.
+ <parameter>force</parameter> can be used by the driver to avoid
+ expensive, destructive operations during automated probing.
+ </para>
+ <para>
+ Return connector_status_connected if something is connected to the
+ connector, connector_status_disconnected if nothing is connected and
+ connector_status_unknown if the connection state isn't known.
+ </para>
+ <para>
+ Drivers should only return connector_status_connected if the connection
+ status has really been probed as connected. Connectors that can't detect
+ the connection status, or failed connection status probes, should return
+ connector_status_unknown.
+ </para>
+ </sect4>
+ <sect4>
+ <title>Miscellaneous</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>void (*destroy)(struct drm_connector *connector);</synopsis>
+ <para>
+ Destroy the connector when not needed anymore. See
+ <xref linkend="drm-kms-init"/>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect4>
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>Cleanup</title>
+ <para>
+ The DRM core manages its objects' lifetime. When an object is not needed
+ anymore the core calls its destroy function, which must clean up and
+ free every resource allocated for the object. Every
+ <function>drm_*_init</function> call must be matched with a
+ corresponding <function>drm_*_cleanup</function> call to cleanup CRTCs
+ (<function>drm_crtc_cleanup</function>), planes
+ (<function>drm_plane_cleanup</function>), encoders
+ (<function>drm_encoder_cleanup</function>) and connectors
+ (<function>drm_connector_cleanup</function>). Furthermore, connectors
+ that have been added to sysfs must be removed by a call to
+ <function>drm_sysfs_connector_remove</function> before calling
+ <function>drm_connector_cleanup</function>.
+ </para>
+ <para>
+ Connectors state change detection must be cleanup up with a call to
+ <function>drm_kms_helper_poll_fini</function>.
+ </para>
+ </sect2>
+ <sect2>
+ <title>Output discovery and initialization example</title>
+ <programlisting><![CDATA[
+void intel_crt_init(struct drm_device *dev)
+{
+ struct drm_connector *connector;
+ struct intel_output *intel_output;
+
+ intel_output = kzalloc(sizeof(struct intel_output), GFP_KERNEL);
+ if (!intel_output)
+ return;
+
+ connector = &intel_output->base;
+ drm_connector_init(dev, &intel_output->base,
+ &intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
+
+ drm_encoder_init(dev, &intel_output->enc, &intel_crt_enc_funcs,
+ DRM_MODE_ENCODER_DAC);
+
+ drm_mode_connector_attach_encoder(&intel_output->base,
+ &intel_output->enc);
+
+ /* Set up the DDC bus. */
+ intel_output->ddc_bus = intel_i2c_create(dev, GPIOA, "CRTDDC_A");
+ if (!intel_output->ddc_bus) {
+ dev_printk(KERN_ERR, &dev->pdev->dev, "DDC bus registration "
+ "failed.\n");
+ return;
+ }
+
+ intel_output->type = INTEL_OUTPUT_ANALOG;
+ connector->interlace_allowed = 0;
+ connector->doublescan_allowed = 0;
+
+ drm_encoder_helper_add(&intel_output->enc, &intel_crt_helper_funcs);
+ drm_connector_helper_add(connector, &intel_crt_connector_helper_funcs);
+
+ drm_sysfs_connector_add(connector);
+}]]></programlisting>
+ <para>
+ In the example above (taken from the i915 driver), a CRTC, connector and
+ encoder combination is created. A device-specific i2c bus is also
+ created for fetching EDID data and performing monitor detection. Once
+ the process is complete, the new connector is registered with sysfs to
+ make its properties available to applications.
+ </para>
+ </sect2>
+ <sect2>
+ <title>KMS API Functions</title>
+!Edrivers/gpu/drm/drm_crtc.c
+ </sect2>
+ </sect1>
+
+ <!-- Internals: kms helper functions -->
+
+ <sect1>
+ <title>Mode Setting Helper Functions</title>
+ <para>
+ The CRTC, encoder and connector functions provided by the drivers
+ implement the DRM API. They're called by the DRM core and ioctl handlers
+ to handle device state changes and configuration request. As implementing
+ those functions often requires logic not specific to drivers, mid-layer
+ helper functions are available to avoid duplicating boilerplate code.
+ </para>
+ <para>
+ The DRM core contains one mid-layer implementation. The mid-layer provides
+ implementations of several CRTC, encoder and connector functions (called
+ from the top of the mid-layer) that pre-process requests and call
+ lower-level functions provided by the driver (at the bottom of the
+ mid-layer). For instance, the
+ <function>drm_crtc_helper_set_config</function> function can be used to
+ fill the struct <structname>drm_crtc_funcs</structname>
+ <structfield>set_config</structfield> field. When called, it will split
+ the <methodname>set_config</methodname> operation in smaller, simpler
+ operations and call the driver to handle them.
+ </para>
+ <para>
+ To use the mid-layer, drivers call <function>drm_crtc_helper_add</function>,
+ <function>drm_encoder_helper_add</function> and
+ <function>drm_connector_helper_add</function> functions to install their
+ mid-layer bottom operations handlers, and fill the
+ <structname>drm_crtc_funcs</structname>,
+ <structname>drm_encoder_funcs</structname> and
+ <structname>drm_connector_funcs</structname> structures with pointers to
+ the mid-layer top API functions. Installing the mid-layer bottom operation
+ handlers is best done right after registering the corresponding KMS object.
+ </para>
+ <para>
+ The mid-layer is not split between CRTC, encoder and connector operations.
+ To use it, a driver must provide bottom functions for all of the three KMS
+ entities.
+ </para>
+ <sect2>
+ <title>Helper Functions</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>int drm_crtc_helper_set_config(struct drm_mode_set *set);</synopsis>
+ <para>
+ The <function>drm_crtc_helper_set_config</function> helper function
+ is a CRTC <methodname>set_config</methodname> implementation. It
+ first tries to locate the best encoder for each connector by calling
+ the connector <methodname>best_encoder</methodname> helper
+ operation.
+ </para>
+ <para>
+ After locating the appropriate encoders, the helper function will
+ call the <methodname>mode_fixup</methodname> encoder and CRTC helper
+ operations to adjust the requested mode, or reject it completely in
+ which case an error will be returned to the application. If the new
+ configuration after mode adjustment is identical to the current
+ configuration the helper function will return without performing any
+ other operation.
+ </para>
+ <para>
+ If the adjusted mode is identical to the current mode but changes to
+ the frame buffer need to be applied, the
+ <function>drm_crtc_helper_set_config</function> function will call
+ the CRTC <methodname>mode_set_base</methodname> helper operation. If
+ the adjusted mode differs from the current mode, or if the
+ <methodname>mode_set_base</methodname> helper operation is not
+ provided, the helper function performs a full mode set sequence by
+ calling the <methodname>prepare</methodname>,
+ <methodname>mode_set</methodname> and
+ <methodname>commit</methodname> CRTC and encoder helper operations,
+ in that order.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void drm_helper_connector_dpms(struct drm_connector *connector, int mode);</synopsis>
+ <para>
+ The <function>drm_helper_connector_dpms</function> helper function
+ is a connector <methodname>dpms</methodname> implementation that
+ tracks power state of connectors. To use the function, drivers must
+ provide <methodname>dpms</methodname> helper operations for CRTCs
+ and encoders to apply the DPMS state to the device.
+ </para>
+ <para>
+ The mid-layer doesn't track the power state of CRTCs and encoders.
+ The <methodname>dpms</methodname> helper operations can thus be
+ called with a mode identical to the currently active mode.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
+ uint32_t maxX, uint32_t maxY);</synopsis>
+ <para>
+ The <function>drm_helper_probe_single_connector_modes</function> helper
+ function is a connector <methodname>fill_modes</methodname>
+ implementation that updates the connection status for the connector
+ and then retrieves a list of modes by calling the connector
+ <methodname>get_modes</methodname> helper operation.
+ </para>
+ <para>
+ The function filters out modes larger than
+ <parameter>max_width</parameter> and <parameter>max_height</parameter>
+ if specified. It then calls the connector
+ <methodname>mode_valid</methodname> helper operation for each mode in
+ the probed list to check whether the mode is valid for the connector.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+ <sect2>
+ <title>CRTC Helper Operations</title>
+ <itemizedlist>
+ <listitem id="drm-helper-crtc-mode-fixup">
+ <synopsis>bool (*mode_fixup)(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode);</synopsis>
+ <para>
+ Let CRTCs adjust the requested mode or reject it completely. This
+ operation returns true if the mode is accepted (possibly after being
+ adjusted) or false if it is rejected.
+ </para>
+ <para>
+ The <methodname>mode_fixup</methodname> operation should reject the
+ mode if it can't reasonably use it. The definition of "reasonable"
+ is currently fuzzy in this context. One possible behaviour would be
+ to set the adjusted mode to the panel timings when a fixed-mode
+ panel is used with hardware capable of scaling. Another behaviour
+ would be to accept any input mode and adjust it to the closest mode
+ supported by the hardware (FIXME: This needs to be clarified).
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*mode_set_base)(struct drm_crtc *crtc, int x, int y,
+ struct drm_framebuffer *old_fb)</synopsis>
+ <para>
+ Move the CRTC on the current frame buffer (stored in
+ <literal>crtc-&gt;fb</literal>) to position (x,y). Any of the frame
+ buffer, x position or y position may have been modified.
+ </para>
+ <para>
+ This helper operation is optional. If not provided, the
+ <function>drm_crtc_helper_set_config</function> function will fall
+ back to the <methodname>mode_set</methodname> helper operation.
+ </para>
+ <note><para>
+ FIXME: Why are x and y passed as arguments, as they can be accessed
+ through <literal>crtc-&gt;x</literal> and
+ <literal>crtc-&gt;y</literal>?
+ </para></note>
+ </listitem>
+ <listitem>
+ <synopsis>void (*prepare)(struct drm_crtc *crtc);</synopsis>
+ <para>
+ Prepare the CRTC for mode setting. This operation is called after
+ validating the requested mode. Drivers use it to perform
+ device-specific operations required before setting the new mode.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode, int x, int y,
+ struct drm_framebuffer *old_fb);</synopsis>
+ <para>
+ Set a new mode, position and frame buffer. Depending on the device
+ requirements, the mode can be stored internally by the driver and
+ applied in the <methodname>commit</methodname> operation, or
+ programmed to the hardware immediately.
+ </para>
+ <para>
+ The <methodname>mode_set</methodname> operation returns 0 on success
+ or a negative error code if an error occurs.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*commit)(struct drm_crtc *crtc);</synopsis>
+ <para>
+ Commit a mode. This operation is called after setting the new mode.
+ Upon return the device must use the new mode and be fully
+ operational.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+ <sect2>
+ <title>Encoder Helper Operations</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>bool (*mode_fixup)(struct drm_encoder *encoder,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode);</synopsis>
+ <note><para>
+ FIXME: The mode argument be const, but the i915 driver modifies
+ mode-&gt;clock in <function>intel_dp_mode_fixup</function>.
+ </para></note>
+ <para>
+ Let encoders adjust the requested mode or reject it completely. This
+ operation returns true if the mode is accepted (possibly after being
+ adjusted) or false if it is rejected. See the
+ <link linkend="drm-helper-crtc-mode-fixup">mode_fixup CRTC helper
+ operation</link> for an explanation of the allowed adjustments.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*prepare)(struct drm_encoder *encoder);</synopsis>
+ <para>
+ Prepare the encoder for mode setting. This operation is called after
+ validating the requested mode. Drivers use it to perform
+ device-specific operations required before setting the new mode.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*mode_set)(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode);</synopsis>
+ <para>
+ Set a new mode. Depending on the device requirements, the mode can
+ be stored internally by the driver and applied in the
+ <methodname>commit</methodname> operation, or programmed to the
+ hardware immediately.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*commit)(struct drm_encoder *encoder);</synopsis>
+ <para>
+ Commit a mode. This operation is called after setting the new mode.
+ Upon return the device must use the new mode and be fully
+ operational.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+ <sect2>
+ <title>Connector Helper Operations</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>struct drm_encoder *(*best_encoder)(struct drm_connector *connector);</synopsis>
+ <para>
+ Return a pointer to the best encoder for the connecter. Device that
+ map connectors to encoders 1:1 simply return the pointer to the
+ associated encoder. This operation is mandatory.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*get_modes)(struct drm_connector *connector);</synopsis>
+ <para>
+ Fill the connector's <structfield>probed_modes</structfield> list
+ by parsing EDID data with <function>drm_add_edid_modes</function> or
+ calling <function>drm_mode_probed_add</function> directly for every
+ supported mode and return the number of modes it has detected. This
+ operation is mandatory.
+ </para>
+ <para>
+ When adding modes manually the driver creates each mode with a call to
+ <function>drm_mode_create</function> and must fill the following fields.
+ <itemizedlist>
+ <listitem>
+ <synopsis>__u32 type;</synopsis>
+ <para>
+ Mode type bitmask, a combination of
+ <variablelist>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_BUILTIN</term>
+ <listitem><para>not used?</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_CLOCK_C</term>
+ <listitem><para>not used?</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_CRTC_C</term>
+ <listitem><para>not used?</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>
+ DRM_MODE_TYPE_PREFERRED - The preferred mode for the connector
+ </term>
+ <listitem>
+ <para>not used?</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_DEFAULT</term>
+ <listitem><para>not used?</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_USERDEF</term>
+ <listitem><para>not used?</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_DRIVER</term>
+ <listitem>
+ <para>
+ The mode has been created by the driver (as opposed to
+ to user-created modes).
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ Drivers must set the DRM_MODE_TYPE_DRIVER bit for all modes they
+ create, and set the DRM_MODE_TYPE_PREFERRED bit for the preferred
+ mode.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>__u32 clock;</synopsis>
+ <para>Pixel clock frequency in kHz unit</para>
+ </listitem>
+ <listitem>
+ <synopsis>__u16 hdisplay, hsync_start, hsync_end, htotal;
+ __u16 vdisplay, vsync_start, vsync_end, vtotal;</synopsis>
+ <para>Horizontal and vertical timing information</para>
+ <screen><![CDATA[
+ Active Front Sync Back
+ Region Porch Porch
+ <-----------------------><----------------><-------------><-------------->
+
+ //////////////////////|
+ ////////////////////// |
+ ////////////////////// |.................. ................
+ _______________
+
+ <----- [hv]display ----->
+ <------------- [hv]sync_start ------------>
+ <--------------------- [hv]sync_end --------------------->
+ <-------------------------------- [hv]total ----------------------------->
+]]></screen>
+ </listitem>
+ <listitem>
+ <synopsis>__u16 hskew;
+ __u16 vscan;</synopsis>
+ <para>Unknown</para>
+ </listitem>
+ <listitem>
+ <synopsis>__u32 flags;</synopsis>
+ <para>
+ Mode flags, a combination of
+ <variablelist>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_PHSYNC</term>
+ <listitem><para>
+ Horizontal sync is active high
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_NHSYNC</term>
+ <listitem><para>
+ Horizontal sync is active low
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_PVSYNC</term>
+ <listitem><para>
+ Vertical sync is active high
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_NVSYNC</term>
+ <listitem><para>
+ Vertical sync is active low
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_INTERLACE</term>
+ <listitem><para>
+ Mode is interlaced
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_DBLSCAN</term>
+ <listitem><para>
+ Mode uses doublescan
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_CSYNC</term>
+ <listitem><para>
+ Mode uses composite sync
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_PCSYNC</term>
+ <listitem><para>
+ Composite sync is active high
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_NCSYNC</term>
+ <listitem><para>
+ Composite sync is active low
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_HSKEW</term>
+ <listitem><para>
+ hskew provided (not used?)
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_BCAST</term>
+ <listitem><para>
+ not used?
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_PIXMUX</term>
+ <listitem><para>
+ not used?
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_DBLCLK</term>
+ <listitem><para>
+ not used?
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_CLKDIV2</term>
+ <listitem><para>
+ ?
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+ <para>
+ Note that modes marked with the INTERLACE or DBLSCAN flags will be
+ filtered out by
+ <function>drm_helper_probe_single_connector_modes</function> if
+ the connector's <structfield>interlace_allowed</structfield> or
+ <structfield>doublescan_allowed</structfield> field is set to 0.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>char name[DRM_DISPLAY_MODE_LEN];</synopsis>
+ <para>
+ Mode name. The driver must call
+ <function>drm_mode_set_name</function> to fill the mode name from
+ <structfield>hdisplay</structfield>,
+ <structfield>vdisplay</structfield> and interlace flag after
+ filling the corresponding fields.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The <structfield>vrefresh</structfield> value is computed by
+ <function>drm_helper_probe_single_connector_modes</function>.
+ </para>
+ <para>
+ When parsing EDID data, <function>drm_add_edid_modes</function> fill the
+ connector <structfield>display_info</structfield>
+ <structfield>width_mm</structfield> and
+ <structfield>height_mm</structfield> fields. When creating modes
+ manually the <methodname>get_modes</methodname> helper operation must
+ set the <structfield>display_info</structfield>
+ <structfield>width_mm</structfield> and
+ <structfield>height_mm</structfield> fields if they haven't been set
+ already (for instance at initilization time when a fixed-size panel is
+ attached to the connector). The mode <structfield>width_mm</structfield>
+ and <structfield>height_mm</structfield> fields are only used internally
+ during EDID parsing and should not be set when creating modes manually.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*mode_valid)(struct drm_connector *connector,
+ struct drm_display_mode *mode);</synopsis>
+ <para>
+ Verify whether a mode is valid for the connector. Return MODE_OK for
+ supported modes and one of the enum drm_mode_status values (MODE_*)
+ for unsupported modes. This operation is mandatory.
+ </para>
+ <para>
+ As the mode rejection reason is currently not used beside for
+ immediately removing the unsupported mode, an implementation can
+ return MODE_BAD regardless of the exact reason why the mode is not
+ valid.
+ </para>
+ <note><para>
+ Note that the <methodname>mode_valid</methodname> helper operation is
+ only called for modes detected by the device, and
+ <emphasis>not</emphasis> for modes set by the user through the CRTC
+ <methodname>set_config</methodname> operation.
+ </para></note>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+ <sect2>
+ <title>Modeset Helper Functions Reference</title>
+!Edrivers/gpu/drm/drm_crtc_helper.c
+ </sect2>
+ <sect2>
+ <title>fbdev Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_fb_helper.c fbdev helpers
+!Edrivers/gpu/drm/drm_fb_helper.c
+!Iinclude/drm/drm_fb_helper.h
+ </sect2>
+ <sect2>
+ <title>Display Port Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_dp_helper.c dp helpers
+!Iinclude/drm/drm_dp_helper.h
+!Edrivers/gpu/drm/drm_dp_helper.c
+ </sect2>
+ <sect2>
+ <title>EDID Helper Functions Reference</title>
+!Edrivers/gpu/drm/drm_edid.c
+ </sect2>
+ </sect1>
+
+ <!-- Internals: vertical blanking -->
+
+ <sect1 id="drm-vertical-blank">
+ <title>Vertical Blanking</title>
+ <para>
+ Vertical blanking plays a major role in graphics rendering. To achieve
+ tear-free display, users must synchronize page flips and/or rendering to
+ vertical blanking. The DRM API offers ioctls to perform page flips
+ synchronized to vertical blanking and wait for vertical blanking.
+ </para>
+ <para>
+ The DRM core handles most of the vertical blanking management logic, which
+ involves filtering out spurious interrupts, keeping race-free blanking
+ counters, coping with counter wrap-around and resets and keeping use
+ counts. It relies on the driver to generate vertical blanking interrupts
+ and optionally provide a hardware vertical blanking counter. Drivers must
+ implement the following operations.
+ </para>
+ <itemizedlist>
+ <listitem>
+ <synopsis>int (*enable_vblank) (struct drm_device *dev, int crtc);
+void (*disable_vblank) (struct drm_device *dev, int crtc);</synopsis>
+ <para>
+ Enable or disable vertical blanking interrupts for the given CRTC.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>u32 (*get_vblank_counter) (struct drm_device *dev, int crtc);</synopsis>
+ <para>
+ Retrieve the value of the vertical blanking counter for the given
+ CRTC. If the hardware maintains a vertical blanking counter its value
+ should be returned. Otherwise drivers can use the
+ <function>drm_vblank_count</function> helper function to handle this
+ operation.
+ </para>
+ </listitem>
+ </itemizedlist>
+ <para>
+ Drivers must initialize the vertical blanking handling core with a call to
+ <function>drm_vblank_init</function> in their
+ <methodname>load</methodname> operation. The function will set the struct
+ <structname>drm_device</structname>
+ <structfield>vblank_disable_allowed</structfield> field to 0. This will
+ keep vertical blanking interrupts enabled permanently until the first mode
+ set operation, where <structfield>vblank_disable_allowed</structfield> is
+ set to 1. The reason behind this is not clear. Drivers can set the field
+ to 1 after <function>calling drm_vblank_init</function> to make vertical
+ blanking interrupts dynamically managed from the beginning.
+ </para>
+ <para>
+ Vertical blanking interrupts can be enabled by the DRM core or by drivers
+ themselves (for instance to handle page flipping operations). The DRM core
+ maintains a vertical blanking use count to ensure that the interrupts are
+ not disabled while a user still needs them. To increment the use count,
+ drivers call <function>drm_vblank_get</function>. Upon return vertical
+ blanking interrupts are guaranteed to be enabled.
+ </para>
+ <para>
+ To decrement the use count drivers call
+ <function>drm_vblank_put</function>. Only when the use count drops to zero
+ will the DRM core disable the vertical blanking interrupts after a delay
+ by scheduling a timer. The delay is accessible through the vblankoffdelay
+ module parameter or the <varname>drm_vblank_offdelay</varname> global
+ variable and expressed in milliseconds. Its default value is 5000 ms.
+ </para>
+ <para>
+ When a vertical blanking interrupt occurs drivers only need to call the
+ <function>drm_handle_vblank</function> function to account for the
+ interrupt.
+ </para>
+ <para>
+ Resources allocated by <function>drm_vblank_init</function> must be freed
+ with a call to <function>drm_vblank_cleanup</function> in the driver
+ <methodname>unload</methodname> operation handler.
+ </para>
+ </sect1>
+
+ <!-- Internals: open/close, file operations and ioctls -->
+
+ <sect1>
+ <title>Open/Close, File Operations and IOCTLs</title>
+ <sect2>
+ <title>Open and Close</title>
+ <synopsis>int (*firstopen) (struct drm_device *);
+void (*lastclose) (struct drm_device *);
+int (*open) (struct drm_device *, struct drm_file *);
+void (*preclose) (struct drm_device *, struct drm_file *);
+void (*postclose) (struct drm_device *, struct drm_file *);</synopsis>
+ <abstract>Open and close handlers. None of those methods are mandatory.
+ </abstract>
+ <para>
+ The <methodname>firstopen</methodname> method is called by the DRM core
+ when an application opens a device that has no other opened file handle.
+ Similarly the <methodname>lastclose</methodname> method is called when
+ the last application holding a file handle opened on the device closes
+ it. Both methods are mostly used for UMS (User Mode Setting) drivers to
+ acquire and release device resources which should be done in the
+ <methodname>load</methodname> and <methodname>unload</methodname>
+ methods for KMS drivers.
+ </para>
+ <para>
+ Note that the <methodname>lastclose</methodname> method is also called
+ at module unload time or, for hot-pluggable devices, when the device is
+ unplugged. The <methodname>firstopen</methodname> and
+ <methodname>lastclose</methodname> calls can thus be unbalanced.
+ </para>
+ <para>
+ The <methodname>open</methodname> method is called every time the device
+ is opened by an application. Drivers can allocate per-file private data
+ in this method and store them in the struct
+ <structname>drm_file</structname> <structfield>driver_priv</structfield>
+ field. Note that the <methodname>open</methodname> method is called
+ before <methodname>firstopen</methodname>.
+ </para>
+ <para>
+ The close operation is split into <methodname>preclose</methodname> and
+ <methodname>postclose</methodname> methods. Drivers must stop and
+ cleanup all per-file operations in the <methodname>preclose</methodname>
+ method. For instance pending vertical blanking and page flip events must
+ be cancelled. No per-file operation is allowed on the file handle after
+ returning from the <methodname>preclose</methodname> method.
+ </para>
+ <para>
+ Finally the <methodname>postclose</methodname> method is called as the
+ last step of the close operation, right before calling the
+ <methodname>lastclose</methodname> method if no other open file handle
+ exists for the device. Drivers that have allocated per-file private data
+ in the <methodname>open</methodname> method should free it here.
+ </para>
+ <para>
+ The <methodname>lastclose</methodname> method should restore CRTC and
+ plane properties to default value, so that a subsequent open of the
+ device will not inherit state from the previous user.
+ </para>
+ </sect2>
+ <sect2>
+ <title>File Operations</title>
+ <synopsis>const struct file_operations *fops</synopsis>
+ <abstract>File operations for the DRM device node.</abstract>
+ <para>
+ Drivers must define the file operations structure that forms the DRM
+ userspace API entry point, even though most of those operations are
+ implemented in the DRM core. The <methodname>open</methodname>,
+ <methodname>release</methodname> and <methodname>ioctl</methodname>
+ operations are handled by
+ <programlisting>
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ #ifdef CONFIG_COMPAT
+ .compat_ioctl = drm_compat_ioctl,
+ #endif
+ </programlisting>
+ </para>
+ <para>
+ Drivers that implement private ioctls that requires 32/64bit
+ compatibility support must provide their own
+ <methodname>compat_ioctl</methodname> handler that processes private
+ ioctls and calls <function>drm_compat_ioctl</function> for core ioctls.
+ </para>
+ <para>
+ The <methodname>read</methodname> and <methodname>poll</methodname>
+ operations provide support for reading DRM events and polling them. They
+ are implemented by
+ <programlisting>
+ .poll = drm_poll,
+ .read = drm_read,
+ .fasync = drm_fasync,
+ .llseek = no_llseek,
+ </programlisting>
+ </para>
+ <para>
+ The memory mapping implementation varies depending on how the driver
+ manages memory. Pre-GEM drivers will use <function>drm_mmap</function>,
+ while GEM-aware drivers will use <function>drm_gem_mmap</function>. See
+ <xref linkend="drm-gem"/>.
+ <programlisting>
+ .mmap = drm_gem_mmap,
+ </programlisting>
+ </para>
+ <para>
+ No other file operation is supported by the DRM API.
+ </para>
+ </sect2>
+ <sect2>
+ <title>IOCTLs</title>
+ <synopsis>struct drm_ioctl_desc *ioctls;
+int num_ioctls;</synopsis>
+ <abstract>Driver-specific ioctls descriptors table.</abstract>
+ <para>
+ Driver-specific ioctls numbers start at DRM_COMMAND_BASE. The ioctls
+ descriptors table is indexed by the ioctl number offset from the base
+ value. Drivers can use the DRM_IOCTL_DEF_DRV() macro to initialize the
+ table entries.
+ </para>
+ <para>
+ <programlisting>DRM_IOCTL_DEF_DRV(ioctl, func, flags)</programlisting>
+ <para>
+ <parameter>ioctl</parameter> is the ioctl name. Drivers must define
+ the DRM_##ioctl and DRM_IOCTL_##ioctl macros to the ioctl number
+ offset from DRM_COMMAND_BASE and the ioctl number respectively. The
+ first macro is private to the device while the second must be exposed
+ to userspace in a public header.
+ </para>
+ <para>
+ <parameter>func</parameter> is a pointer to the ioctl handler function
+ compatible with the <type>drm_ioctl_t</type> type.
+ <programlisting>typedef int drm_ioctl_t(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);</programlisting>
+ </para>
+ <para>
+ <parameter>flags</parameter> is a bitmask combination of the following
+ values. It restricts how the ioctl is allowed to be called.
+ <itemizedlist>
+ <listitem><para>
+ DRM_AUTH - Only authenticated callers allowed
+ </para></listitem>
+ <listitem><para>
+ DRM_MASTER - The ioctl can only be called on the master file
+ handle
+ </para></listitem>
+ <listitem><para>
+ DRM_ROOT_ONLY - Only callers with the SYSADMIN capability allowed
+ </para></listitem>
+ <listitem><para>
+ DRM_CONTROL_ALLOW - The ioctl can only be called on a control
+ device
+ </para></listitem>
+ <listitem><para>
+ DRM_UNLOCKED - The ioctl handler will be called without locking
+ the DRM global mutex
+ </para></listitem>
+ </itemizedlist>
+ </para>
+ </para>
+ </sect2>
+ </sect1>
+
+ <sect1>
+ <title>Command submission &amp; fencing</title>
+ <para>
+ This should cover a few device-specific command submission
+ implementations.
+ </para>
+ </sect1>
+
+ <!-- Internals: suspend/resume -->
+
+ <sect1>
+ <title>Suspend/Resume</title>
+ <para>
+ The DRM core provides some suspend/resume code, but drivers wanting full
+ suspend/resume support should provide save() and restore() functions.
+ These are called at suspend, hibernate, or resume time, and should perform
+ any state save or restore required by your device across suspend or
+ hibernate states.
+ </para>
+ <synopsis>int (*suspend) (struct drm_device *, pm_message_t state);
+int (*resume) (struct drm_device *);</synopsis>
+ <para>
+ Those are legacy suspend and resume methods. New driver should use the
+ power management interface provided by their bus type (usually through
+ the struct <structname>device_driver</structname> dev_pm_ops) and set
+ these methods to NULL.
+ </para>
+ </sect1>
+
+ <sect1>
+ <title>DMA services</title>
+ <para>
+ This should cover how DMA mapping etc. is supported by the core.
+ These functions are deprecated and should not be used.
+ </para>
+ </sect1>
+ </chapter>
+
+<!-- TODO
+
+- Add a glossary
+- Document the struct_mutex catch-all lock
+- Document connector properties
+
+- Why is the load method optional?
+- What are drivers supposed to set the initial display state to, and how?
+ Connector's DPMS states are not initialized and are thus equal to
+ DRM_MODE_DPMS_ON. The fbcon compatibility layer calls
+ drm_helper_disable_unused_functions(), which disables unused encoders and
+ CRTCs, but doesn't touch the connectors' DPMS state, and
+ drm_helper_connector_dpms() in reaction to fbdev blanking events. Do drivers
+ that don't implement (or just don't use) fbcon compatibility need to call
+ those functions themselves?
+- KMS drivers must call drm_vblank_pre_modeset() and drm_vblank_post_modeset()
+ around mode setting. Should this be done in the DRM core?
+- vblank_disable_allowed is set to 1 in the first drm_vblank_post_modeset()
+ call and never set back to 0. It seems to be safe to permanently set it to 1
+ in drm_vblank_init() for KMS driver, and it might be safe for UMS drivers as
+ well. This should be investigated.
+- crtc and connector .save and .restore operations are only used internally in
+ drivers, should they be removed from the core?
+- encoder mid-layer .save and .restore operations are only used internally in
+ drivers, should they be removed from the core?
+- encoder mid-layer .detect operation is only used internally in drivers,
+ should it be removed from the core?
+-->
+
+ <!-- External interfaces -->
+
+ <chapter id="drmExternals">
+ <title>Userland interfaces</title>
+ <para>
+ The DRM core exports several interfaces to applications,
+ generally intended to be used through corresponding libdrm
+ wrapper functions. In addition, drivers export device-specific
+ interfaces for use by userspace drivers &amp; device-aware
+ applications through ioctls and sysfs files.
+ </para>
+ <para>
+ External interfaces include: memory mapping, context management,
+ DMA operations, AGP management, vblank control, fence
+ management, memory management, and output management.
+ </para>
+ <para>
+ Cover generic ioctls and sysfs layout here. We only need high-level
+ info, since man pages should cover the rest.
+ </para>
+
+ <!-- External: vblank handling -->
+
+ <sect1>
+ <title>VBlank event handling</title>
+ <para>
+ The DRM core exposes two vertical blank related ioctls:
+ <variablelist>
+ <varlistentry>
+ <term>DRM_IOCTL_WAIT_VBLANK</term>
+ <listitem>
+ <para>
+ This takes a struct drm_wait_vblank structure as its argument,
+ and it is used to block or request a signal when a specified
+ vblank event occurs.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_IOCTL_MODESET_CTL</term>
+ <listitem>
+ <para>
+ This should be called by application level drivers before and
+ after mode setting, since on many devices the vertical blank
+ counter is reset at that time. Internally, the DRM snapshots
+ the last vblank count when the ioctl is called with the
+ _DRM_PRE_MODESET command, so that the counter won't go backwards
+ (which is dealt with when _DRM_POST_MODESET is used).
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+<!--!Edrivers/char/drm/drm_irq.c-->
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <!-- API reference -->
+
+ <appendix id="drmDriverApi">
+ <title>DRM Driver API</title>
+ <para>
+ Include auto-generated API reference here (need to reference it
+ from paragraphs above too).
+ </para>
+ </appendix>
+
+</book>
diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl
new file mode 100644
index 00000000..25b58efd
--- /dev/null
+++ b/Documentation/DocBook/filesystems.tmpl
@@ -0,0 +1,426 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Linux-filesystems-API">
+ <bookinfo>
+ <title>Linux Filesystems API</title>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="vfs">
+ <title>The Linux VFS</title>
+ <sect1 id="the_filesystem_types"><title>The Filesystem types</title>
+!Iinclude/linux/fs.h
+ </sect1>
+ <sect1 id="the_directory_cache"><title>The Directory Cache</title>
+!Efs/dcache.c
+!Iinclude/linux/dcache.h
+ </sect1>
+ <sect1 id="inode_handling"><title>Inode Handling</title>
+!Efs/inode.c
+!Efs/bad_inode.c
+ </sect1>
+ <sect1 id="registration_and_superblocks"><title>Registration and Superblocks</title>
+!Efs/super.c
+ </sect1>
+ <sect1 id="file_locks"><title>File Locks</title>
+!Efs/locks.c
+!Ifs/locks.c
+ </sect1>
+ <sect1 id="other_functions"><title>Other Functions</title>
+!Efs/mpage.c
+!Efs/namei.c
+!Efs/buffer.c
+!Efs/bio.c
+!Efs/seq_file.c
+!Efs/filesystems.c
+!Efs/fs-writeback.c
+!Efs/block_dev.c
+ </sect1>
+ </chapter>
+
+ <chapter id="proc">
+ <title>The proc filesystem</title>
+
+ <sect1 id="sysctl_interface"><title>sysctl interface</title>
+!Ekernel/sysctl.c
+ </sect1>
+
+ <sect1 id="proc_filesystem_interface"><title>proc filesystem interface</title>
+!Ifs/proc/base.c
+ </sect1>
+ </chapter>
+
+ <chapter id="fs_events">
+ <title>Events based on file descriptors</title>
+!Efs/eventfd.c
+ </chapter>
+
+ <chapter id="sysfs">
+ <title>The Filesystem for Exporting Kernel Objects</title>
+!Efs/sysfs/file.c
+!Efs/sysfs/symlink.c
+!Efs/sysfs/bin.c
+ </chapter>
+
+ <chapter id="debugfs">
+ <title>The debugfs filesystem</title>
+
+ <sect1 id="debugfs_interface"><title>debugfs interface</title>
+!Efs/debugfs/inode.c
+!Efs/debugfs/file.c
+ </sect1>
+ </chapter>
+
+ <chapter id="LinuxJDBAPI">
+ <chapterinfo>
+ <title>The Linux Journalling API</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Roger</firstname>
+ <surname>Gammans</surname>
+ <affiliation>
+ <address>
+ <email>rgammans@computer-surgery.co.uk</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <authorgroup>
+ <author>
+ <firstname>Stephen</firstname>
+ <surname>Tweedie</surname>
+ <affiliation>
+ <address>
+ <email>sct@redhat.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2002</year>
+ <holder>Roger Gammans</holder>
+ </copyright>
+ </chapterinfo>
+
+ <title>The Linux Journalling API</title>
+
+ <sect1 id="journaling_overview">
+ <title>Overview</title>
+ <sect2 id="journaling_details">
+ <title>Details</title>
+<para>
+The journalling layer is easy to use. You need to
+first of all create a journal_t data structure. There are
+two calls to do this dependent on how you decide to allocate the physical
+media on which the journal resides. The journal_init_inode() call
+is for journals stored in filesystem inodes, or the journal_init_dev()
+call can be use for journal stored on a raw device (in a continuous range
+of blocks). A journal_t is a typedef for a struct pointer, so when
+you are finally finished make sure you call journal_destroy() on it
+to free up any used kernel memory.
+</para>
+
+<para>
+Once you have got your journal_t object you need to 'mount' or load the journal
+file, unless of course you haven't initialised it yet - in which case you
+need to call journal_create().
+</para>
+
+<para>
+Most of the time however your journal file will already have been created, but
+before you load it you must call journal_wipe() to empty the journal file.
+Hang on, you say , what if the filesystem wasn't cleanly umount()'d . Well, it is the
+job of the client file system to detect this and skip the call to journal_wipe().
+</para>
+
+<para>
+In either case the next call should be to journal_load() which prepares the
+journal file for use. Note that journal_wipe(..,0) calls journal_skip_recovery()
+for you if it detects any outstanding transactions in the journal and similarly
+journal_load() will call journal_recover() if necessary.
+I would advise reading fs/ext3/super.c for examples on this stage.
+[RGG: Why is the journal_wipe() call necessary - doesn't this needlessly
+complicate the API. Or isn't a good idea for the journal layer to hide
+dirty mounts from the client fs]
+</para>
+
+<para>
+Now you can go ahead and start modifying the underlying
+filesystem. Almost.
+</para>
+
+<para>
+
+You still need to actually journal your filesystem changes, this
+is done by wrapping them into transactions. Additionally you
+also need to wrap the modification of each of the buffers
+with calls to the journal layer, so it knows what the modifications
+you are actually making are. To do this use journal_start() which
+returns a transaction handle.
+</para>
+
+<para>
+journal_start()
+and its counterpart journal_stop(), which indicates the end of a transaction
+are nestable calls, so you can reenter a transaction if necessary,
+but remember you must call journal_stop() the same number of times as
+journal_start() before the transaction is completed (or more accurately
+leaves the update phase). Ext3/VFS makes use of this feature to simplify
+quota support.
+</para>
+
+<para>
+Inside each transaction you need to wrap the modifications to the
+individual buffers (blocks). Before you start to modify a buffer you
+need to call journal_get_{create,write,undo}_access() as appropriate,
+this allows the journalling layer to copy the unmodified data if it
+needs to. After all the buffer may be part of a previously uncommitted
+transaction.
+At this point you are at last ready to modify a buffer, and once
+you are have done so you need to call journal_dirty_{meta,}data().
+Or if you've asked for access to a buffer you now know is now longer
+required to be pushed back on the device you can call journal_forget()
+in much the same way as you might have used bforget() in the past.
+</para>
+
+<para>
+A journal_flush() may be called at any time to commit and checkpoint
+all your transactions.
+</para>
+
+<para>
+Then at umount time , in your put_super() you can then call journal_destroy()
+to clean up your in-core journal object.
+</para>
+
+<para>
+Unfortunately there a couple of ways the journal layer can cause a deadlock.
+The first thing to note is that each task can only have
+a single outstanding transaction at any one time, remember nothing
+commits until the outermost journal_stop(). This means
+you must complete the transaction at the end of each file/inode/address
+etc. operation you perform, so that the journalling system isn't re-entered
+on another journal. Since transactions can't be nested/batched
+across differing journals, and another filesystem other than
+yours (say ext3) may be modified in a later syscall.
+</para>
+
+<para>
+The second case to bear in mind is that journal_start() can
+block if there isn't enough space in the journal for your transaction
+(based on the passed nblocks param) - when it blocks it merely(!) needs to
+wait for transactions to complete and be committed from other tasks,
+so essentially we are waiting for journal_stop(). So to avoid
+deadlocks you must treat journal_start/stop() as if they
+were semaphores and include them in your semaphore ordering rules to prevent
+deadlocks. Note that journal_extend() has similar blocking behaviour to
+journal_start() so you can deadlock here just as easily as on journal_start().
+</para>
+
+<para>
+Try to reserve the right number of blocks the first time. ;-). This will
+be the maximum number of blocks you are going to touch in this transaction.
+I advise having a look at at least ext3_jbd.h to see the basis on which
+ext3 uses to make these decisions.
+</para>
+
+<para>
+Another wriggle to watch out for is your on-disk block allocation strategy.
+why? Because, if you undo a delete, you need to ensure you haven't reused any
+of the freed blocks in a later transaction. One simple way of doing this
+is make sure any blocks you allocate only have checkpointed transactions
+listed against them. Ext3 does this in ext3_test_allocatable().
+</para>
+
+<para>
+Lock is also providing through journal_{un,}lock_updates(),
+ext3 uses this when it wants a window with a clean and stable fs for a moment.
+eg.
+</para>
+
+<programlisting>
+
+ journal_lock_updates() //stop new stuff happening..
+ journal_flush() // checkpoint everything.
+ ..do stuff on stable fs
+ journal_unlock_updates() // carry on with filesystem use.
+</programlisting>
+
+<para>
+The opportunities for abuse and DOS attacks with this should be obvious,
+if you allow unprivileged userspace to trigger codepaths containing these
+calls.
+</para>
+
+<para>
+A new feature of jbd since 2.5.25 is commit callbacks with the new
+journal_callback_set() function you can now ask the journalling layer
+to call you back when the transaction is finally committed to disk, so that
+you can do some of your own management. The key to this is the journal_callback
+struct, this maintains the internal callback information but you can
+extend it like this:-
+</para>
+<programlisting>
+ struct myfs_callback_s {
+ //Data structure element required by jbd..
+ struct journal_callback for_jbd;
+ // Stuff for myfs allocated together.
+ myfs_inode* i_commited;
+
+ }
+</programlisting>
+
+<para>
+this would be useful if you needed to know when data was committed to a
+particular inode.
+</para>
+
+ </sect2>
+
+ <sect2 id="jbd_summary">
+ <title>Summary</title>
+<para>
+Using the journal is a matter of wrapping the different context changes,
+being each mount, each modification (transaction) and each changed buffer
+to tell the journalling layer about them.
+</para>
+
+<para>
+Here is a some pseudo code to give you an idea of how it works, as
+an example.
+</para>
+
+<programlisting>
+ journal_t* my_jnrl = journal_create();
+ journal_init_{dev,inode}(jnrl,...)
+ if (clean) journal_wipe();
+ journal_load();
+
+ foreach(transaction) { /*transactions must be
+ completed before
+ a syscall returns to
+ userspace*/
+
+ handle_t * xct=journal_start(my_jnrl);
+ foreach(bh) {
+ journal_get_{create,write,undo}_access(xact,bh);
+ if ( myfs_modify(bh) ) { /* returns true
+ if makes changes */
+ journal_dirty_{meta,}data(xact,bh);
+ } else {
+ journal_forget(bh);
+ }
+ }
+ journal_stop(xct);
+ }
+ journal_destroy(my_jrnl);
+</programlisting>
+ </sect2>
+
+ </sect1>
+
+ <sect1 id="data_types">
+ <title>Data Types</title>
+ <para>
+ The journalling layer uses typedefs to 'hide' the concrete definitions
+ of the structures used. As a client of the JBD layer you can
+ just rely on the using the pointer as a magic cookie of some sort.
+
+ Obviously the hiding is not enforced as this is 'C'.
+ </para>
+ <sect2 id="structures"><title>Structures</title>
+!Iinclude/linux/jbd.h
+ </sect2>
+ </sect1>
+
+ <sect1 id="functions">
+ <title>Functions</title>
+ <para>
+ The functions here are split into two groups those that
+ affect a journal as a whole, and those which are used to
+ manage transactions
+ </para>
+ <sect2 id="journal_level"><title>Journal Level</title>
+!Efs/jbd/journal.c
+!Ifs/jbd/recovery.c
+ </sect2>
+ <sect2 id="transaction_level"><title>Transasction Level</title>
+!Efs/jbd/transaction.c
+ </sect2>
+ </sect1>
+ <sect1 id="see_also">
+ <title>See also</title>
+ <para>
+ <citation>
+ <ulink url="http://kernel.org/pub/linux/kernel/people/sct/ext3/journal-design.ps.gz">
+ Journaling the Linux ext2fs Filesystem, LinuxExpo 98, Stephen Tweedie
+ </ulink>
+ </citation>
+ </para>
+ <para>
+ <citation>
+ <ulink url="http://olstrans.sourceforge.net/release/OLS2000-ext3/OLS2000-ext3.html">
+ Ext3 Journalling FileSystem, OLS 2000, Dr. Stephen Tweedie
+ </ulink>
+ </citation>
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="splice">
+ <title>splice API</title>
+ <para>
+ splice is a method for moving blocks of data around inside the
+ kernel, without continually transferring them between the kernel
+ and user space.
+ </para>
+!Ffs/splice.c
+ </chapter>
+
+ <chapter id="pipes">
+ <title>pipes API</title>
+ <para>
+ Pipe interfaces are all for in-kernel (builtin image) use.
+ They are not exported for use by modules.
+ </para>
+!Iinclude/linux/pipe_fs_i.h
+!Ffs/pipe.c
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/gadget.tmpl b/Documentation/DocBook/gadget.tmpl
new file mode 100644
index 00000000..4017f147
--- /dev/null
+++ b/Documentation/DocBook/gadget.tmpl
@@ -0,0 +1,793 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="USB-Gadget-API">
+ <bookinfo>
+ <title>USB Gadget API for Linux</title>
+ <date>20 August 2004</date>
+ <edition>20 August 2004</edition>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ <copyright>
+ <year>2003-2004</year>
+ <holder>David Brownell</holder>
+ </copyright>
+
+ <author>
+ <firstname>David</firstname>
+ <surname>Brownell</surname>
+ <affiliation>
+ <address><email>dbrownell@users.sourceforge.net</email></address>
+ </affiliation>
+ </author>
+ </bookinfo>
+
+<toc></toc>
+
+<chapter id="intro"><title>Introduction</title>
+
+<para>This document presents a Linux-USB "Gadget"
+kernel mode
+API, for use within peripherals and other USB devices
+that embed Linux.
+It provides an overview of the API structure,
+and shows how that fits into a system development project.
+This is the first such API released on Linux to address
+a number of important problems, including: </para>
+
+<itemizedlist>
+ <listitem><para>Supports USB 2.0, for high speed devices which
+ can stream data at several dozen megabytes per second.
+ </para></listitem>
+ <listitem><para>Handles devices with dozens of endpoints just as
+ well as ones with just two fixed-function ones. Gadget drivers
+ can be written so they're easy to port to new hardware.
+ </para></listitem>
+ <listitem><para>Flexible enough to expose more complex USB device
+ capabilities such as multiple configurations, multiple interfaces,
+ composite devices,
+ and alternate interface settings.
+ </para></listitem>
+ <listitem><para>USB "On-The-Go" (OTG) support, in conjunction
+ with updates to the Linux-USB host side.
+ </para></listitem>
+ <listitem><para>Sharing data structures and API models with the
+ Linux-USB host side API. This helps the OTG support, and
+ looks forward to more-symmetric frameworks (where the same
+ I/O model is used by both host and device side drivers).
+ </para></listitem>
+ <listitem><para>Minimalist, so it's easier to support new device
+ controller hardware. I/O processing doesn't imply large
+ demands for memory or CPU resources.
+ </para></listitem>
+</itemizedlist>
+
+
+<para>Most Linux developers will not be able to use this API, since they
+have USB "host" hardware in a PC, workstation, or server.
+Linux users with embedded systems are more likely to
+have USB peripheral hardware.
+To distinguish drivers running inside such hardware from the
+more familiar Linux "USB device drivers",
+which are host side proxies for the real USB devices,
+a different term is used:
+the drivers inside the peripherals are "USB gadget drivers".
+In USB protocol interactions, the device driver is the master
+(or "client driver")
+and the gadget driver is the slave (or "function driver").
+</para>
+
+<para>The gadget API resembles the host side Linux-USB API in that both
+use queues of request objects to package I/O buffers, and those requests
+may be submitted or canceled.
+They share common definitions for the standard USB
+<emphasis>Chapter 9</emphasis> messages, structures, and constants.
+Also, both APIs bind and unbind drivers to devices.
+The APIs differ in detail, since the host side's current
+URB framework exposes a number of implementation details
+and assumptions that are inappropriate for a gadget API.
+While the model for control transfers and configuration
+management is necessarily different (one side is a hardware-neutral master,
+the other is a hardware-aware slave), the endpoint I/0 API used here
+should also be usable for an overhead-reduced host side API.
+</para>
+
+</chapter>
+
+<chapter id="structure"><title>Structure of Gadget Drivers</title>
+
+<para>A system running inside a USB peripheral
+normally has at least three layers inside the kernel to handle
+USB protocol processing, and may have additional layers in
+user space code.
+The "gadget" API is used by the middle layer to interact
+with the lowest level (which directly handles hardware).
+</para>
+
+<para>In Linux, from the bottom up, these layers are:
+</para>
+
+<variablelist>
+
+ <varlistentry>
+ <term><emphasis>USB Controller Driver</emphasis></term>
+
+ <listitem>
+ <para>This is the lowest software level.
+ It is the only layer that talks to hardware,
+ through registers, fifos, dma, irqs, and the like.
+ The <filename>&lt;linux/usb/gadget.h&gt;</filename> API abstracts
+ the peripheral controller endpoint hardware.
+ That hardware is exposed through endpoint objects, which accept
+ streams of IN/OUT buffers, and through callbacks that interact
+ with gadget drivers.
+ Since normal USB devices only have one upstream
+ port, they only have one of these drivers.
+ The controller driver can support any number of different
+ gadget drivers, but only one of them can be used at a time.
+ </para>
+
+ <para>Examples of such controller hardware include
+ the PCI-based NetChip 2280 USB 2.0 high speed controller,
+ the SA-11x0 or PXA-25x UDC (found within many PDAs),
+ and a variety of other products.
+ </para>
+
+ </listitem></varlistentry>
+
+ <varlistentry>
+ <term><emphasis>Gadget Driver</emphasis></term>
+
+ <listitem>
+ <para>The lower boundary of this driver implements hardware-neutral
+ USB functions, using calls to the controller driver.
+ Because such hardware varies widely in capabilities and restrictions,
+ and is used in embedded environments where space is at a premium,
+ the gadget driver is often configured at compile time
+ to work with endpoints supported by one particular controller.
+ Gadget drivers may be portable to several different controllers,
+ using conditional compilation.
+ (Recent kernels substantially simplify the work involved in
+ supporting new hardware, by <emphasis>autoconfiguring</emphasis>
+ endpoints automatically for many bulk-oriented drivers.)
+ Gadget driver responsibilities include:
+ </para>
+ <itemizedlist>
+ <listitem><para>handling setup requests (ep0 protocol responses)
+ possibly including class-specific functionality
+ </para></listitem>
+ <listitem><para>returning configuration and string descriptors
+ </para></listitem>
+ <listitem><para>(re)setting configurations and interface
+ altsettings, including enabling and configuring endpoints
+ </para></listitem>
+ <listitem><para>handling life cycle events, such as managing
+ bindings to hardware,
+ USB suspend/resume, remote wakeup,
+ and disconnection from the USB host.
+ </para></listitem>
+ <listitem><para>managing IN and OUT transfers on all currently
+ enabled endpoints
+ </para></listitem>
+ </itemizedlist>
+
+ <para>
+ Such drivers may be modules of proprietary code, although
+ that approach is discouraged in the Linux community.
+ </para>
+ </listitem></varlistentry>
+
+ <varlistentry>
+ <term><emphasis>Upper Level</emphasis></term>
+
+ <listitem>
+ <para>Most gadget drivers have an upper boundary that connects
+ to some Linux driver or framework in Linux.
+ Through that boundary flows the data which the gadget driver
+ produces and/or consumes through protocol transfers over USB.
+ Examples include:
+ </para>
+ <itemizedlist>
+ <listitem><para>user mode code, using generic (gadgetfs)
+ or application specific files in
+ <filename>/dev</filename>
+ </para></listitem>
+ <listitem><para>networking subsystem (for network gadgets,
+ like the CDC Ethernet Model gadget driver)
+ </para></listitem>
+ <listitem><para>data capture drivers, perhaps video4Linux or
+ a scanner driver; or test and measurement hardware.
+ </para></listitem>
+ <listitem><para>input subsystem (for HID gadgets)
+ </para></listitem>
+ <listitem><para>sound subsystem (for audio gadgets)
+ </para></listitem>
+ <listitem><para>file system (for PTP gadgets)
+ </para></listitem>
+ <listitem><para>block i/o subsystem (for usb-storage gadgets)
+ </para></listitem>
+ <listitem><para>... and more </para></listitem>
+ </itemizedlist>
+ </listitem></varlistentry>
+
+ <varlistentry>
+ <term><emphasis>Additional Layers</emphasis></term>
+
+ <listitem>
+ <para>Other layers may exist.
+ These could include kernel layers, such as network protocol stacks,
+ as well as user mode applications building on standard POSIX
+ system call APIs such as
+ <emphasis>open()</emphasis>, <emphasis>close()</emphasis>,
+ <emphasis>read()</emphasis> and <emphasis>write()</emphasis>.
+ On newer systems, POSIX Async I/O calls may be an option.
+ Such user mode code will not necessarily be subject to
+ the GNU General Public License (GPL).
+ </para>
+ </listitem></varlistentry>
+
+
+</variablelist>
+
+<para>OTG-capable systems will also need to include a standard Linux-USB
+host side stack,
+with <emphasis>usbcore</emphasis>,
+one or more <emphasis>Host Controller Drivers</emphasis> (HCDs),
+<emphasis>USB Device Drivers</emphasis> to support
+the OTG "Targeted Peripheral List",
+and so forth.
+There will also be an <emphasis>OTG Controller Driver</emphasis>,
+which is visible to gadget and device driver developers only indirectly.
+That helps the host and device side USB controllers implement the
+two new OTG protocols (HNP and SRP).
+Roles switch (host to peripheral, or vice versa) using HNP
+during USB suspend processing, and SRP can be viewed as a
+more battery-friendly kind of device wakeup protocol.
+</para>
+
+<para>Over time, reusable utilities are evolving to help make some
+gadget driver tasks simpler.
+For example, building configuration descriptors from vectors of
+descriptors for the configurations interfaces and endpoints is
+now automated, and many drivers now use autoconfiguration to
+choose hardware endpoints and initialize their descriptors.
+
+A potential example of particular interest
+is code implementing standard USB-IF protocols for
+HID, networking, storage, or audio classes.
+Some developers are interested in KDB or KGDB hooks, to let
+target hardware be remotely debugged.
+Most such USB protocol code doesn't need to be hardware-specific,
+any more than network protocols like X11, HTTP, or NFS are.
+Such gadget-side interface drivers should eventually be combined,
+to implement composite devices.
+</para>
+
+</chapter>
+
+
+<chapter id="api"><title>Kernel Mode Gadget API</title>
+
+<para>Gadget drivers declare themselves through a
+<emphasis>struct usb_gadget_driver</emphasis>, which is responsible for
+most parts of enumeration for a <emphasis>struct usb_gadget</emphasis>.
+The response to a set_configuration usually involves
+enabling one or more of the <emphasis>struct usb_ep</emphasis> objects
+exposed by the gadget, and submitting one or more
+<emphasis>struct usb_request</emphasis> buffers to transfer data.
+Understand those four data types, and their operations, and
+you will understand how this API works.
+</para>
+
+<note><title>Incomplete Data Type Descriptions</title>
+
+<para>This documentation was prepared using the standard Linux
+kernel <filename>docproc</filename> tool, which turns text
+and in-code comments into SGML DocBook and then into usable
+formats such as HTML or PDF.
+Other than the "Chapter 9" data types, most of the significant
+data types and functions are described here.
+</para>
+
+<para>However, docproc does not understand all the C constructs
+that are used, so some relevant information is likely omitted from
+what you are reading.
+One example of such information is endpoint autoconfiguration.
+You'll have to read the header file, and use example source
+code (such as that for "Gadget Zero"), to fully understand the API.
+</para>
+
+<para>The part of the API implementing some basic
+driver capabilities is specific to the version of the
+Linux kernel that's in use.
+The 2.6 kernel includes a <emphasis>driver model</emphasis>
+framework that has no analogue on earlier kernels;
+so those parts of the gadget API are not fully portable.
+(They are implemented on 2.4 kernels, but in a different way.)
+The driver model state is another part of this API that is
+ignored by the kerneldoc tools.
+</para>
+</note>
+
+<para>The core API does not expose
+every possible hardware feature, only the most widely available ones.
+There are significant hardware features, such as device-to-device DMA
+(without temporary storage in a memory buffer)
+that would be added using hardware-specific APIs.
+</para>
+
+<para>This API allows drivers to use conditional compilation to handle
+endpoint capabilities of different hardware, but doesn't require that.
+Hardware tends to have arbitrary restrictions, relating to
+transfer types, addressing, packet sizes, buffering, and availability.
+As a rule, such differences only matter for "endpoint zero" logic
+that handles device configuration and management.
+The API supports limited run-time
+detection of capabilities, through naming conventions for endpoints.
+Many drivers will be able to at least partially autoconfigure
+themselves.
+In particular, driver init sections will often have endpoint
+autoconfiguration logic that scans the hardware's list of endpoints
+to find ones matching the driver requirements
+(relying on those conventions), to eliminate some of the most
+common reasons for conditional compilation.
+</para>
+
+<para>Like the Linux-USB host side API, this API exposes
+the "chunky" nature of USB messages: I/O requests are in terms
+of one or more "packets", and packet boundaries are visible to drivers.
+Compared to RS-232 serial protocols, USB resembles
+synchronous protocols like HDLC
+(N bytes per frame, multipoint addressing, host as the primary
+station and devices as secondary stations)
+more than asynchronous ones
+(tty style: 8 data bits per frame, no parity, one stop bit).
+So for example the controller drivers won't buffer
+two single byte writes into a single two-byte USB IN packet,
+although gadget drivers may do so when they implement
+protocols where packet boundaries (and "short packets")
+are not significant.
+</para>
+
+<sect1 id="lifecycle"><title>Driver Life Cycle</title>
+
+<para>Gadget drivers make endpoint I/O requests to hardware without
+needing to know many details of the hardware, but driver
+setup/configuration code needs to handle some differences.
+Use the API like this:
+</para>
+
+<orderedlist numeration='arabic'>
+
+<listitem><para>Register a driver for the particular device side
+usb controller hardware,
+such as the net2280 on PCI (USB 2.0),
+sa11x0 or pxa25x as found in Linux PDAs,
+and so on.
+At this point the device is logically in the USB ch9 initial state
+("attached"), drawing no power and not usable
+(since it does not yet support enumeration).
+Any host should not see the device, since it's not
+activated the data line pullup used by the host to
+detect a device, even if VBUS power is available.
+</para></listitem>
+
+<listitem><para>Register a gadget driver that implements some higher level
+device function. That will then bind() to a usb_gadget, which
+activates the data line pullup sometime after detecting VBUS.
+</para></listitem>
+
+<listitem><para>The hardware driver can now start enumerating.
+The steps it handles are to accept USB power and set_address requests.
+Other steps are handled by the gadget driver.
+If the gadget driver module is unloaded before the host starts to
+enumerate, steps before step 7 are skipped.
+</para></listitem>
+
+<listitem><para>The gadget driver's setup() call returns usb descriptors,
+based both on what the bus interface hardware provides and on the
+functionality being implemented.
+That can involve alternate settings or configurations,
+unless the hardware prevents such operation.
+For OTG devices, each configuration descriptor includes
+an OTG descriptor.
+</para></listitem>
+
+<listitem><para>The gadget driver handles the last step of enumeration,
+when the USB host issues a set_configuration call.
+It enables all endpoints used in that configuration,
+with all interfaces in their default settings.
+That involves using a list of the hardware's endpoints, enabling each
+endpoint according to its descriptor.
+It may also involve using <function>usb_gadget_vbus_draw</function>
+to let more power be drawn from VBUS, as allowed by that configuration.
+For OTG devices, setting a configuration may also involve reporting
+HNP capabilities through a user interface.
+</para></listitem>
+
+<listitem><para>Do real work and perform data transfers, possibly involving
+changes to interface settings or switching to new configurations, until the
+device is disconnect()ed from the host.
+Queue any number of transfer requests to each endpoint.
+It may be suspended and resumed several times before being disconnected.
+On disconnect, the drivers go back to step 3 (above).
+</para></listitem>
+
+<listitem><para>When the gadget driver module is being unloaded,
+the driver unbind() callback is issued. That lets the controller
+driver be unloaded.
+</para></listitem>
+
+</orderedlist>
+
+<para>Drivers will normally be arranged so that just loading the
+gadget driver module (or statically linking it into a Linux kernel)
+allows the peripheral device to be enumerated, but some drivers
+will defer enumeration until some higher level component (like
+a user mode daemon) enables it.
+Note that at this lowest level there are no policies about how
+ep0 configuration logic is implemented,
+except that it should obey USB specifications.
+Such issues are in the domain of gadget drivers,
+including knowing about implementation constraints
+imposed by some USB controllers
+or understanding that composite devices might happen to
+be built by integrating reusable components.
+</para>
+
+<para>Note that the lifecycle above can be slightly different
+for OTG devices.
+Other than providing an additional OTG descriptor in each
+configuration, only the HNP-related differences are particularly
+visible to driver code.
+They involve reporting requirements during the SET_CONFIGURATION
+request, and the option to invoke HNP during some suspend callbacks.
+Also, SRP changes the semantics of
+<function>usb_gadget_wakeup</function>
+slightly.
+</para>
+
+</sect1>
+
+<sect1 id="ch9"><title>USB 2.0 Chapter 9 Types and Constants</title>
+
+<para>Gadget drivers
+rely on common USB structures and constants
+defined in the
+<filename>&lt;linux/usb/ch9.h&gt;</filename>
+header file, which is standard in Linux 2.6 kernels.
+These are the same types and constants used by host
+side drivers (and usbcore).
+</para>
+
+!Iinclude/linux/usb/ch9.h
+</sect1>
+
+<sect1 id="core"><title>Core Objects and Methods</title>
+
+<para>These are declared in
+<filename>&lt;linux/usb/gadget.h&gt;</filename>,
+and are used by gadget drivers to interact with
+USB peripheral controller drivers.
+</para>
+
+ <!-- yeech, this is ugly in nsgmls PDF output.
+
+ the PDF bookmark and refentry output nesting is wrong,
+ and the member/argument documentation indents ugly.
+
+ plus something (docproc?) adds whitespace before the
+ descriptive paragraph text, so it can't line up right
+ unless the explanations are trivial.
+ -->
+
+!Iinclude/linux/usb/gadget.h
+</sect1>
+
+<sect1 id="utils"><title>Optional Utilities</title>
+
+<para>The core API is sufficient for writing a USB Gadget Driver,
+but some optional utilities are provided to simplify common tasks.
+These utilities include endpoint autoconfiguration.
+</para>
+
+!Edrivers/usb/gadget/usbstring.c
+!Edrivers/usb/gadget/config.c
+<!-- !Edrivers/usb/gadget/epautoconf.c -->
+</sect1>
+
+<sect1 id="composite"><title>Composite Device Framework</title>
+
+<para>The core API is sufficient for writing drivers for composite
+USB devices (with more than one function in a given configuration),
+and also multi-configuration devices (also more than one function,
+but not necessarily sharing a given configuration).
+There is however an optional framework which makes it easier to
+reuse and combine functions.
+</para>
+
+<para>Devices using this framework provide a <emphasis>struct
+usb_composite_driver</emphasis>, which in turn provides one or
+more <emphasis>struct usb_configuration</emphasis> instances.
+Each such configuration includes at least one
+<emphasis>struct usb_function</emphasis>, which packages a user
+visible role such as "network link" or "mass storage device".
+Management functions may also exist, such as "Device Firmware
+Upgrade".
+</para>
+
+!Iinclude/linux/usb/composite.h
+!Edrivers/usb/gadget/composite.c
+
+</sect1>
+
+<sect1 id="functions"><title>Composite Device Functions</title>
+
+<para>At this writing, a few of the current gadget drivers have
+been converted to this framework.
+Near-term plans include converting all of them, except for "gadgetfs".
+</para>
+
+!Edrivers/usb/gadget/f_acm.c
+!Edrivers/usb/gadget/f_ecm.c
+!Edrivers/usb/gadget/f_subset.c
+!Edrivers/usb/gadget/f_obex.c
+!Edrivers/usb/gadget/f_serial.c
+
+</sect1>
+
+
+</chapter>
+
+<chapter id="controllers"><title>Peripheral Controller Drivers</title>
+
+<para>The first hardware supporting this API was the NetChip 2280
+controller, which supports USB 2.0 high speed and is based on PCI.
+This is the <filename>net2280</filename> driver module.
+The driver supports Linux kernel versions 2.4 and 2.6;
+contact NetChip Technologies for development boards and product
+information.
+</para>
+
+<para>Other hardware working in the "gadget" framework includes:
+Intel's PXA 25x and IXP42x series processors
+(<filename>pxa2xx_udc</filename>),
+Toshiba TC86c001 "Goku-S" (<filename>goku_udc</filename>),
+Renesas SH7705/7727 (<filename>sh_udc</filename>),
+MediaQ 11xx (<filename>mq11xx_udc</filename>),
+Hynix HMS30C7202 (<filename>h7202_udc</filename>),
+National 9303/4 (<filename>n9604_udc</filename>),
+Texas Instruments OMAP (<filename>omap_udc</filename>),
+Sharp LH7A40x (<filename>lh7a40x_udc</filename>),
+and more.
+Most of those are full speed controllers.
+</para>
+
+<para>At this writing, there are people at work on drivers in
+this framework for several other USB device controllers,
+with plans to make many of them be widely available.
+</para>
+
+<!-- !Edrivers/usb/gadget/net2280.c -->
+
+<para>A partial USB simulator,
+the <filename>dummy_hcd</filename> driver, is available.
+It can act like a net2280, a pxa25x, or an sa11x0 in terms
+of available endpoints and device speeds; and it simulates
+control, bulk, and to some extent interrupt transfers.
+That lets you develop some parts of a gadget driver on a normal PC,
+without any special hardware, and perhaps with the assistance
+of tools such as GDB running with User Mode Linux.
+At least one person has expressed interest in adapting that
+approach, hooking it up to a simulator for a microcontroller.
+Such simulators can help debug subsystems where the runtime hardware
+is unfriendly to software development, or is not yet available.
+</para>
+
+<para>Support for other controllers is expected to be developed
+and contributed
+over time, as this driver framework evolves.
+</para>
+
+</chapter>
+
+<chapter id="gadget"><title>Gadget Drivers</title>
+
+<para>In addition to <emphasis>Gadget Zero</emphasis>
+(used primarily for testing and development with drivers
+for usb controller hardware), other gadget drivers exist.
+</para>
+
+<para>There's an <emphasis>ethernet</emphasis> gadget
+driver, which implements one of the most useful
+<emphasis>Communications Device Class</emphasis> (CDC) models.
+One of the standards for cable modem interoperability even
+specifies the use of this ethernet model as one of two
+mandatory options.
+Gadgets using this code look to a USB host as if they're
+an Ethernet adapter.
+It provides access to a network where the gadget's CPU is one host,
+which could easily be bridging, routing, or firewalling
+access to other networks.
+Since some hardware can't fully implement the CDC Ethernet
+requirements, this driver also implements a "good parts only"
+subset of CDC Ethernet.
+(That subset doesn't advertise itself as CDC Ethernet,
+to avoid creating problems.)
+</para>
+
+<para>Support for Microsoft's <emphasis>RNDIS</emphasis>
+protocol has been contributed by Pengutronix and Auerswald GmbH.
+This is like CDC Ethernet, but it runs on more slightly USB hardware
+(but less than the CDC subset).
+However, its main claim to fame is being able to connect directly to
+recent versions of Windows, using drivers that Microsoft bundles
+and supports, making it much simpler to network with Windows.
+</para>
+
+<para>There is also support for user mode gadget drivers,
+using <emphasis>gadgetfs</emphasis>.
+This provides a <emphasis>User Mode API</emphasis> that presents
+each endpoint as a single file descriptor. I/O is done using
+normal <emphasis>read()</emphasis> and <emphasis>read()</emphasis> calls.
+Familiar tools like GDB and pthreads can be used to
+develop and debug user mode drivers, so that once a robust
+controller driver is available many applications for it
+won't require new kernel mode software.
+Linux 2.6 <emphasis>Async I/O (AIO)</emphasis>
+support is available, so that user mode software
+can stream data with only slightly more overhead
+than a kernel driver.
+</para>
+
+<para>There's a USB Mass Storage class driver, which provides
+a different solution for interoperability with systems such
+as MS-Windows and MacOS.
+That <emphasis>Mass Storage</emphasis> driver uses a
+file or block device as backing store for a drive,
+like the <filename>loop</filename> driver.
+The USB host uses the BBB, CB, or CBI versions of the mass
+storage class specification, using transparent SCSI commands
+to access the data from the backing store.
+</para>
+
+<para>There's a "serial line" driver, useful for TTY style
+operation over USB.
+The latest version of that driver supports CDC ACM style
+operation, like a USB modem, and so on most hardware it can
+interoperate easily with MS-Windows.
+One interesting use of that driver is in boot firmware (like a BIOS),
+which can sometimes use that model with very small systems without
+real serial lines.
+</para>
+
+<para>Support for other kinds of gadget is expected to
+be developed and contributed
+over time, as this driver framework evolves.
+</para>
+
+</chapter>
+
+<chapter id="otg"><title>USB On-The-GO (OTG)</title>
+
+<para>USB OTG support on Linux 2.6 was initially developed
+by Texas Instruments for
+<ulink url="http://www.omap.com">OMAP</ulink> 16xx and 17xx
+series processors.
+Other OTG systems should work in similar ways, but the
+hardware level details could be very different.
+</para>
+
+<para>Systems need specialized hardware support to implement OTG,
+notably including a special <emphasis>Mini-AB</emphasis> jack
+and associated transciever to support <emphasis>Dual-Role</emphasis>
+operation:
+they can act either as a host, using the standard
+Linux-USB host side driver stack,
+or as a peripheral, using this "gadget" framework.
+To do that, the system software relies on small additions
+to those programming interfaces,
+and on a new internal component (here called an "OTG Controller")
+affecting which driver stack connects to the OTG port.
+In each role, the system can re-use the existing pool of
+hardware-neutral drivers, layered on top of the controller
+driver interfaces (<emphasis>usb_bus</emphasis> or
+<emphasis>usb_gadget</emphasis>).
+Such drivers need at most minor changes, and most of the calls
+added to support OTG can also benefit non-OTG products.
+</para>
+
+<itemizedlist>
+ <listitem><para>Gadget drivers test the <emphasis>is_otg</emphasis>
+ flag, and use it to determine whether or not to include
+ an OTG descriptor in each of their configurations.
+ </para></listitem>
+ <listitem><para>Gadget drivers may need changes to support the
+ two new OTG protocols, exposed in new gadget attributes
+ such as <emphasis>b_hnp_enable</emphasis> flag.
+ HNP support should be reported through a user interface
+ (two LEDs could suffice), and is triggered in some cases
+ when the host suspends the peripheral.
+ SRP support can be user-initiated just like remote wakeup,
+ probably by pressing the same button.
+ </para></listitem>
+ <listitem><para>On the host side, USB device drivers need
+ to be taught to trigger HNP at appropriate moments, using
+ <function>usb_suspend_device()</function>.
+ That also conserves battery power, which is useful even
+ for non-OTG configurations.
+ </para></listitem>
+ <listitem><para>Also on the host side, a driver must support the
+ OTG "Targeted Peripheral List". That's just a whitelist,
+ used to reject peripherals not supported with a given
+ Linux OTG host.
+ <emphasis>This whitelist is product-specific;
+ each product must modify <filename>otg_whitelist.h</filename>
+ to match its interoperability specification.
+ </emphasis>
+ </para>
+ <para>Non-OTG Linux hosts, like PCs and workstations,
+ normally have some solution for adding drivers, so that
+ peripherals that aren't recognized can eventually be supported.
+ That approach is unreasonable for consumer products that may
+ never have their firmware upgraded, and where it's usually
+ unrealistic to expect traditional PC/workstation/server kinds
+ of support model to work.
+ For example, it's often impractical to change device firmware
+ once the product has been distributed, so driver bugs can't
+ normally be fixed if they're found after shipment.
+ </para></listitem>
+</itemizedlist>
+
+<para>
+Additional changes are needed below those hardware-neutral
+<emphasis>usb_bus</emphasis> and <emphasis>usb_gadget</emphasis>
+driver interfaces; those aren't discussed here in any detail.
+Those affect the hardware-specific code for each USB Host or Peripheral
+controller, and how the HCD initializes (since OTG can be active only
+on a single port).
+They also involve what may be called an <emphasis>OTG Controller
+Driver</emphasis>, managing the OTG transceiver and the OTG state
+machine logic as well as much of the root hub behavior for the
+OTG port.
+The OTG controller driver needs to activate and deactivate USB
+controllers depending on the relevant device role.
+Some related changes were needed inside usbcore, so that it
+can identify OTG-capable devices and respond appropriately
+to HNP or SRP protocols.
+</para>
+
+</chapter>
+
+</book>
+<!--
+ vim:syntax=sgml:sw=4
+-->
diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl
new file mode 100644
index 00000000..b3422341
--- /dev/null
+++ b/Documentation/DocBook/genericirq.tmpl
@@ -0,0 +1,507 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Generic-IRQ-Guide">
+ <bookinfo>
+ <title>Linux generic IRQ handling</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Thomas</firstname>
+ <surname>Gleixner</surname>
+ <affiliation>
+ <address>
+ <email>tglx@linutronix.de</email>
+ </address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Ingo</firstname>
+ <surname>Molnar</surname>
+ <affiliation>
+ <address>
+ <email>mingo@elte.hu</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2005-2010</year>
+ <holder>Thomas Gleixner</holder>
+ </copyright>
+ <copyright>
+ <year>2005-2006</year>
+ <holder>Ingo Molnar</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ The generic interrupt handling layer is designed to provide a
+ complete abstraction of interrupt handling for device drivers.
+ It is able to handle all the different types of interrupt controller
+ hardware. Device drivers use generic API functions to request, enable,
+ disable and free interrupts. The drivers do not have to know anything
+ about interrupt hardware details, so they can be used on different
+ platforms without code changes.
+ </para>
+ <para>
+ This documentation is provided to developers who want to implement
+ an interrupt subsystem based for their architecture, with the help
+ of the generic IRQ handling layer.
+ </para>
+ </chapter>
+
+ <chapter id="rationale">
+ <title>Rationale</title>
+ <para>
+ The original implementation of interrupt handling in Linux is using
+ the __do_IRQ() super-handler, which is able to deal with every
+ type of interrupt logic.
+ </para>
+ <para>
+ Originally, Russell King identified different types of handlers to
+ build a quite universal set for the ARM interrupt handler
+ implementation in Linux 2.5/2.6. He distinguished between:
+ <itemizedlist>
+ <listitem><para>Level type</para></listitem>
+ <listitem><para>Edge type</para></listitem>
+ <listitem><para>Simple type</para></listitem>
+ </itemizedlist>
+ During the implementation we identified another type:
+ <itemizedlist>
+ <listitem><para>Fast EOI type</para></listitem>
+ </itemizedlist>
+ In the SMP world of the __do_IRQ() super-handler another type
+ was identified:
+ <itemizedlist>
+ <listitem><para>Per CPU type</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ This split implementation of highlevel IRQ handlers allows us to
+ optimize the flow of the interrupt handling for each specific
+ interrupt type. This reduces complexity in that particular codepath
+ and allows the optimized handling of a given type.
+ </para>
+ <para>
+ The original general IRQ implementation used hw_interrupt_type
+ structures and their ->ack(), ->end() [etc.] callbacks to
+ differentiate the flow control in the super-handler. This leads to
+ a mix of flow logic and lowlevel hardware logic, and it also leads
+ to unnecessary code duplication: for example in i386, there is a
+ ioapic_level_irq and a ioapic_edge_irq irq-type which share many
+ of the lowlevel details but have different flow handling.
+ </para>
+ <para>
+ A more natural abstraction is the clean separation of the
+ 'irq flow' and the 'chip details'.
+ </para>
+ <para>
+ Analysing a couple of architecture's IRQ subsystem implementations
+ reveals that most of them can use a generic set of 'irq flow'
+ methods and only need to add the chip level specific code.
+ The separation is also valuable for (sub)architectures
+ which need specific quirks in the irq flow itself but not in the
+ chip-details - and thus provides a more transparent IRQ subsystem
+ design.
+ </para>
+ <para>
+ Each interrupt descriptor is assigned its own highlevel flow
+ handler, which is normally one of the generic
+ implementations. (This highlevel flow handler implementation also
+ makes it simple to provide demultiplexing handlers which can be
+ found in embedded platforms on various architectures.)
+ </para>
+ <para>
+ The separation makes the generic interrupt handling layer more
+ flexible and extensible. For example, an (sub)architecture can
+ use a generic irq-flow implementation for 'level type' interrupts
+ and add a (sub)architecture specific 'edge type' implementation.
+ </para>
+ <para>
+ To make the transition to the new model easier and prevent the
+ breakage of existing implementations, the __do_IRQ() super-handler
+ is still available. This leads to a kind of duality for the time
+ being. Over time the new model should be used in more and more
+ architectures, as it enables smaller and cleaner IRQ subsystems.
+ It's deprecated for three years now and about to be removed.
+ </para>
+ </chapter>
+ <chapter id="bugs">
+ <title>Known Bugs And Assumptions</title>
+ <para>
+ None (knock on wood).
+ </para>
+ </chapter>
+
+ <chapter id="Abstraction">
+ <title>Abstraction layers</title>
+ <para>
+ There are three main levels of abstraction in the interrupt code:
+ <orderedlist>
+ <listitem><para>Highlevel driver API</para></listitem>
+ <listitem><para>Highlevel IRQ flow handlers</para></listitem>
+ <listitem><para>Chiplevel hardware encapsulation</para></listitem>
+ </orderedlist>
+ </para>
+ <sect1 id="Interrupt_control_flow">
+ <title>Interrupt control flow</title>
+ <para>
+ Each interrupt is described by an interrupt descriptor structure
+ irq_desc. The interrupt is referenced by an 'unsigned int' numeric
+ value which selects the corresponding interrupt decription structure
+ in the descriptor structures array.
+ The descriptor structure contains status information and pointers
+ to the interrupt flow method and the interrupt chip structure
+ which are assigned to this interrupt.
+ </para>
+ <para>
+ Whenever an interrupt triggers, the lowlevel arch code calls into
+ the generic interrupt code by calling desc->handle_irq().
+ This highlevel IRQ handling function only uses desc->irq_data.chip
+ primitives referenced by the assigned chip descriptor structure.
+ </para>
+ </sect1>
+ <sect1 id="Highlevel_Driver_API">
+ <title>Highlevel Driver API</title>
+ <para>
+ The highlevel Driver API consists of following functions:
+ <itemizedlist>
+ <listitem><para>request_irq()</para></listitem>
+ <listitem><para>free_irq()</para></listitem>
+ <listitem><para>disable_irq()</para></listitem>
+ <listitem><para>enable_irq()</para></listitem>
+ <listitem><para>disable_irq_nosync() (SMP only)</para></listitem>
+ <listitem><para>synchronize_irq() (SMP only)</para></listitem>
+ <listitem><para>irq_set_irq_type()</para></listitem>
+ <listitem><para>irq_set_irq_wake()</para></listitem>
+ <listitem><para>irq_set_handler_data()</para></listitem>
+ <listitem><para>irq_set_chip()</para></listitem>
+ <listitem><para>irq_set_chip_data()</para></listitem>
+ </itemizedlist>
+ See the autogenerated function documentation for details.
+ </para>
+ </sect1>
+ <sect1 id="Highlevel_IRQ_flow_handlers">
+ <title>Highlevel IRQ flow handlers</title>
+ <para>
+ The generic layer provides a set of pre-defined irq-flow methods:
+ <itemizedlist>
+ <listitem><para>handle_level_irq</para></listitem>
+ <listitem><para>handle_edge_irq</para></listitem>
+ <listitem><para>handle_fasteoi_irq</para></listitem>
+ <listitem><para>handle_simple_irq</para></listitem>
+ <listitem><para>handle_percpu_irq</para></listitem>
+ <listitem><para>handle_edge_eoi_irq</para></listitem>
+ <listitem><para>handle_bad_irq</para></listitem>
+ </itemizedlist>
+ The interrupt flow handlers (either predefined or architecture
+ specific) are assigned to specific interrupts by the architecture
+ either during bootup or during device initialization.
+ </para>
+ <sect2 id="Default_flow_implementations">
+ <title>Default flow implementations</title>
+ <sect3 id="Helper_functions">
+ <title>Helper functions</title>
+ <para>
+ The helper functions call the chip primitives and
+ are used by the default flow implementations.
+ The following helper functions are implemented (simplified excerpt):
+ <programlisting>
+default_enable(struct irq_data *data)
+{
+ desc->irq_data.chip->irq_unmask(data);
+}
+
+default_disable(struct irq_data *data)
+{
+ if (!delay_disable(data))
+ desc->irq_data.chip->irq_mask(data);
+}
+
+default_ack(struct irq_data *data)
+{
+ chip->irq_ack(data);
+}
+
+default_mask_ack(struct irq_data *data)
+{
+ if (chip->irq_mask_ack) {
+ chip->irq_mask_ack(data);
+ } else {
+ chip->irq_mask(data);
+ chip->irq_ack(data);
+ }
+}
+
+noop(struct irq_data *data))
+{
+}
+
+ </programlisting>
+ </para>
+ </sect3>
+ </sect2>
+ <sect2 id="Default_flow_handler_implementations">
+ <title>Default flow handler implementations</title>
+ <sect3 id="Default_Level_IRQ_flow_handler">
+ <title>Default Level IRQ flow handler</title>
+ <para>
+ handle_level_irq provides a generic implementation
+ for level-triggered interrupts.
+ </para>
+ <para>
+ The following control flow is implemented (simplified excerpt):
+ <programlisting>
+desc->irq_data.chip->irq_mask_ack();
+handle_irq_event(desc->action);
+desc->irq_data.chip->irq_unmask();
+ </programlisting>
+ </para>
+ </sect3>
+ <sect3 id="Default_FASTEOI_IRQ_flow_handler">
+ <title>Default Fast EOI IRQ flow handler</title>
+ <para>
+ handle_fasteoi_irq provides a generic implementation
+ for interrupts, which only need an EOI at the end of
+ the handler
+ </para>
+ <para>
+ The following control flow is implemented (simplified excerpt):
+ <programlisting>
+handle_irq_event(desc->action);
+desc->irq_data.chip->irq_eoi();
+ </programlisting>
+ </para>
+ </sect3>
+ <sect3 id="Default_Edge_IRQ_flow_handler">
+ <title>Default Edge IRQ flow handler</title>
+ <para>
+ handle_edge_irq provides a generic implementation
+ for edge-triggered interrupts.
+ </para>
+ <para>
+ The following control flow is implemented (simplified excerpt):
+ <programlisting>
+if (desc->status &amp; running) {
+ desc->irq_data.chip->irq_mask_ack();
+ desc->status |= pending | masked;
+ return;
+}
+desc->irq_data.chip->irq_ack();
+desc->status |= running;
+do {
+ if (desc->status &amp; masked)
+ desc->irq_data.chip->irq_unmask();
+ desc->status &amp;= ~pending;
+ handle_irq_event(desc->action);
+} while (status &amp; pending);
+desc->status &amp;= ~running;
+ </programlisting>
+ </para>
+ </sect3>
+ <sect3 id="Default_simple_IRQ_flow_handler">
+ <title>Default simple IRQ flow handler</title>
+ <para>
+ handle_simple_irq provides a generic implementation
+ for simple interrupts.
+ </para>
+ <para>
+ Note: The simple flow handler does not call any
+ handler/chip primitives.
+ </para>
+ <para>
+ The following control flow is implemented (simplified excerpt):
+ <programlisting>
+handle_irq_event(desc->action);
+ </programlisting>
+ </para>
+ </sect3>
+ <sect3 id="Default_per_CPU_flow_handler">
+ <title>Default per CPU flow handler</title>
+ <para>
+ handle_percpu_irq provides a generic implementation
+ for per CPU interrupts.
+ </para>
+ <para>
+ Per CPU interrupts are only available on SMP and
+ the handler provides a simplified version without
+ locking.
+ </para>
+ <para>
+ The following control flow is implemented (simplified excerpt):
+ <programlisting>
+if (desc->irq_data.chip->irq_ack)
+ desc->irq_data.chip->irq_ack();
+handle_irq_event(desc->action);
+if (desc->irq_data.chip->irq_eoi)
+ desc->irq_data.chip->irq_eoi();
+ </programlisting>
+ </para>
+ </sect3>
+ <sect3 id="EOI_Edge_IRQ_flow_handler">
+ <title>EOI Edge IRQ flow handler</title>
+ <para>
+ handle_edge_eoi_irq provides an abnomination of the edge
+ handler which is solely used to tame a badly wreckaged
+ irq controller on powerpc/cell.
+ </para>
+ </sect3>
+ <sect3 id="BAD_IRQ_flow_handler">
+ <title>Bad IRQ flow handler</title>
+ <para>
+ handle_bad_irq is used for spurious interrupts which
+ have no real handler assigned..
+ </para>
+ </sect3>
+ </sect2>
+ <sect2 id="Quirks_and_optimizations">
+ <title>Quirks and optimizations</title>
+ <para>
+ The generic functions are intended for 'clean' architectures and chips,
+ which have no platform-specific IRQ handling quirks. If an architecture
+ needs to implement quirks on the 'flow' level then it can do so by
+ overriding the highlevel irq-flow handler.
+ </para>
+ </sect2>
+ <sect2 id="Delayed_interrupt_disable">
+ <title>Delayed interrupt disable</title>
+ <para>
+ This per interrupt selectable feature, which was introduced by Russell
+ King in the ARM interrupt implementation, does not mask an interrupt
+ at the hardware level when disable_irq() is called. The interrupt is
+ kept enabled and is masked in the flow handler when an interrupt event
+ happens. This prevents losing edge interrupts on hardware which does
+ not store an edge interrupt event while the interrupt is disabled at
+ the hardware level. When an interrupt arrives while the IRQ_DISABLED
+ flag is set, then the interrupt is masked at the hardware level and
+ the IRQ_PENDING bit is set. When the interrupt is re-enabled by
+ enable_irq() the pending bit is checked and if it is set, the
+ interrupt is resent either via hardware or by a software resend
+ mechanism. (It's necessary to enable CONFIG_HARDIRQS_SW_RESEND when
+ you want to use the delayed interrupt disable feature and your
+ hardware is not capable of retriggering an interrupt.)
+ The delayed interrupt disable is not configurable.
+ </para>
+ </sect2>
+ </sect1>
+ <sect1 id="Chiplevel_hardware_encapsulation">
+ <title>Chiplevel hardware encapsulation</title>
+ <para>
+ The chip level hardware descriptor structure irq_chip
+ contains all the direct chip relevant functions, which
+ can be utilized by the irq flow implementations.
+ <itemizedlist>
+ <listitem><para>irq_ack()</para></listitem>
+ <listitem><para>irq_mask_ack() - Optional, recommended for performance</para></listitem>
+ <listitem><para>irq_mask()</para></listitem>
+ <listitem><para>irq_unmask()</para></listitem>
+ <listitem><para>irq_eoi() - Optional, required for eoi flow handlers</para></listitem>
+ <listitem><para>irq_retrigger() - Optional</para></listitem>
+ <listitem><para>irq_set_type() - Optional</para></listitem>
+ <listitem><para>irq_set_wake() - Optional</para></listitem>
+ </itemizedlist>
+ These primitives are strictly intended to mean what they say: ack means
+ ACK, masking means masking of an IRQ line, etc. It is up to the flow
+ handler(s) to use these basic units of lowlevel functionality.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="doirq">
+ <title>__do_IRQ entry point</title>
+ <para>
+ The original implementation __do_IRQ() was an alternative entry
+ point for all types of interrupts. It not longer exists.
+ </para>
+ <para>
+ This handler turned out to be not suitable for all
+ interrupt hardware and was therefore reimplemented with split
+ functionality for edge/level/simple/percpu interrupts. This is not
+ only a functional optimization. It also shortens code paths for
+ interrupts.
+ </para>
+ </chapter>
+
+ <chapter id="locking">
+ <title>Locking on SMP</title>
+ <para>
+ The locking of chip registers is up to the architecture that
+ defines the chip primitives. The per-irq structure is
+ protected via desc->lock, by the generic layer.
+ </para>
+ </chapter>
+ <chapter id="structs">
+ <title>Structures</title>
+ <para>
+ This chapter contains the autogenerated documentation of the structures which are
+ used in the generic IRQ layer.
+ </para>
+!Iinclude/linux/irq.h
+!Iinclude/linux/interrupt.h
+ </chapter>
+
+ <chapter id="pubfunctions">
+ <title>Public Functions Provided</title>
+ <para>
+ This chapter contains the autogenerated documentation of the kernel API functions
+ which are exported.
+ </para>
+!Ekernel/irq/manage.c
+!Ekernel/irq/chip.c
+ </chapter>
+
+ <chapter id="intfunctions">
+ <title>Internal Functions Provided</title>
+ <para>
+ This chapter contains the autogenerated documentation of the internal functions.
+ </para>
+!Ikernel/irq/irqdesc.c
+!Ikernel/irq/handle.c
+!Ikernel/irq/chip.c
+ </chapter>
+
+ <chapter id="credits">
+ <title>Credits</title>
+ <para>
+ The following people have contributed to this document:
+ <orderedlist>
+ <listitem><para>Thomas Gleixner<email>tglx@linutronix.de</email></para></listitem>
+ <listitem><para>Ingo Molnar<email>mingo@elte.hu</email></para></listitem>
+ </orderedlist>
+ </para>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
new file mode 100644
index 00000000..f75ab4c1
--- /dev/null
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -0,0 +1,330 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LinuxKernelAPI">
+ <bookinfo>
+ <title>The Linux Kernel API</title>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="adt">
+ <title>Data Types</title>
+ <sect1><title>Doubly Linked Lists</title>
+!Iinclude/linux/list.h
+ </sect1>
+ </chapter>
+
+ <chapter id="libc">
+ <title>Basic C Library Functions</title>
+
+ <para>
+ When writing drivers, you cannot in general use routines which are
+ from the C Library. Some of the functions have been found generally
+ useful and they are listed below. The behaviour of these functions
+ may vary slightly from those defined by ANSI, and these deviations
+ are noted in the text.
+ </para>
+
+ <sect1><title>String Conversions</title>
+!Elib/vsprintf.c
+!Finclude/linux/kernel.h kstrtol
+!Finclude/linux/kernel.h kstrtoul
+!Elib/kstrtox.c
+ </sect1>
+ <sect1><title>String Manipulation</title>
+<!-- All functions are exported at now
+X!Ilib/string.c
+ -->
+!Elib/string.c
+ </sect1>
+ <sect1><title>Bit Operations</title>
+!Iarch/x86/include/asm/bitops.h
+ </sect1>
+ </chapter>
+
+ <chapter id="kernel-lib">
+ <title>Basic Kernel Library Functions</title>
+
+ <para>
+ The Linux kernel provides more basic utility functions.
+ </para>
+
+ <sect1><title>Bitmap Operations</title>
+!Elib/bitmap.c
+!Ilib/bitmap.c
+ </sect1>
+
+ <sect1><title>Command-line Parsing</title>
+!Elib/cmdline.c
+ </sect1>
+
+ <sect1 id="crc"><title>CRC Functions</title>
+!Elib/crc7.c
+!Elib/crc16.c
+!Elib/crc-itu-t.c
+!Elib/crc32.c
+!Elib/crc-ccitt.c
+ </sect1>
+
+ <sect1 id="idr"><title>idr/ida Functions</title>
+!Pinclude/linux/idr.h idr sync
+!Plib/idr.c IDA description
+!Elib/idr.c
+ </sect1>
+ </chapter>
+
+ <chapter id="mm">
+ <title>Memory Management in Linux</title>
+ <sect1><title>The Slab Cache</title>
+!Iinclude/linux/slab.h
+!Emm/slab.c
+ </sect1>
+ <sect1><title>User Space Memory Access</title>
+!Iarch/x86/include/asm/uaccess_32.h
+!Earch/x86/lib/usercopy_32.c
+ </sect1>
+ <sect1><title>More Memory Management Functions</title>
+!Emm/readahead.c
+!Emm/filemap.c
+!Emm/memory.c
+!Emm/vmalloc.c
+!Imm/page_alloc.c
+!Emm/mempool.c
+!Emm/dmapool.c
+!Emm/page-writeback.c
+!Emm/truncate.c
+ </sect1>
+ </chapter>
+
+
+ <chapter id="ipc">
+ <title>Kernel IPC facilities</title>
+
+ <sect1><title>IPC utilities</title>
+!Iipc/util.c
+ </sect1>
+ </chapter>
+
+ <chapter id="kfifo">
+ <title>FIFO Buffer</title>
+ <sect1><title>kfifo interface</title>
+!Iinclude/linux/kfifo.h
+ </sect1>
+ </chapter>
+
+ <chapter id="relayfs">
+ <title>relay interface support</title>
+
+ <para>
+ Relay interface support
+ is designed to provide an efficient mechanism for tools and
+ facilities to relay large amounts of data from kernel space to
+ user space.
+ </para>
+
+ <sect1><title>relay interface</title>
+!Ekernel/relay.c
+!Ikernel/relay.c
+ </sect1>
+ </chapter>
+
+ <chapter id="modload">
+ <title>Module Support</title>
+ <sect1><title>Module Loading</title>
+!Ekernel/kmod.c
+ </sect1>
+ <sect1><title>Inter Module support</title>
+ <para>
+ Refer to the file kernel/module.c for more information.
+ </para>
+<!-- FIXME: Removed for now since no structured comments in source
+X!Ekernel/module.c
+-->
+ </sect1>
+ </chapter>
+
+ <chapter id="hardware">
+ <title>Hardware Interfaces</title>
+ <sect1><title>Interrupt Handling</title>
+!Ekernel/irq/manage.c
+ </sect1>
+
+ <sect1><title>DMA Channels</title>
+!Ekernel/dma.c
+ </sect1>
+
+ <sect1><title>Resources Management</title>
+!Ikernel/resource.c
+!Ekernel/resource.c
+ </sect1>
+
+ <sect1><title>MTRR Handling</title>
+!Earch/x86/kernel/cpu/mtrr/main.c
+ </sect1>
+
+ <sect1><title>PCI Support Library</title>
+!Edrivers/pci/pci.c
+!Edrivers/pci/pci-driver.c
+!Edrivers/pci/remove.c
+!Edrivers/pci/search.c
+!Edrivers/pci/msi.c
+!Edrivers/pci/bus.c
+!Edrivers/pci/access.c
+!Edrivers/pci/irq.c
+!Edrivers/pci/htirq.c
+<!-- FIXME: Removed for now since no structured comments in source
+X!Edrivers/pci/hotplug.c
+-->
+!Edrivers/pci/probe.c
+!Edrivers/pci/slot.c
+!Edrivers/pci/rom.c
+!Edrivers/pci/iov.c
+!Idrivers/pci/pci-sysfs.c
+ </sect1>
+ <sect1><title>PCI Hotplug Support Library</title>
+!Edrivers/pci/hotplug/pci_hotplug_core.c
+ </sect1>
+ </chapter>
+
+ <chapter id="firmware">
+ <title>Firmware Interfaces</title>
+ <sect1><title>DMI Interfaces</title>
+!Edrivers/firmware/dmi_scan.c
+ </sect1>
+ <sect1><title>EDD Interfaces</title>
+!Idrivers/firmware/edd.c
+ </sect1>
+ </chapter>
+
+ <chapter id="security">
+ <title>Security Framework</title>
+!Isecurity/security.c
+!Esecurity/inode.c
+ </chapter>
+
+ <chapter id="audit">
+ <title>Audit Interfaces</title>
+!Ekernel/audit.c
+!Ikernel/auditsc.c
+!Ikernel/auditfilter.c
+ </chapter>
+
+ <chapter id="accounting">
+ <title>Accounting Framework</title>
+!Ikernel/acct.c
+ </chapter>
+
+ <chapter id="blkdev">
+ <title>Block Devices</title>
+!Eblock/blk-core.c
+!Iblock/blk-core.c
+!Eblock/blk-map.c
+!Iblock/blk-sysfs.c
+!Eblock/blk-settings.c
+!Eblock/blk-exec.c
+!Eblock/blk-flush.c
+!Eblock/blk-lib.c
+!Eblock/blk-tag.c
+!Iblock/blk-tag.c
+!Eblock/blk-integrity.c
+!Ikernel/trace/blktrace.c
+!Iblock/genhd.c
+!Eblock/genhd.c
+ </chapter>
+
+ <chapter id="chrdev">
+ <title>Char devices</title>
+!Efs/char_dev.c
+ </chapter>
+
+ <chapter id="miscdev">
+ <title>Miscellaneous Devices</title>
+!Edrivers/char/misc.c
+ </chapter>
+
+ <chapter id="clk">
+ <title>Clock Framework</title>
+
+ <para>
+ The clock framework defines programming interfaces to support
+ software management of the system clock tree.
+ This framework is widely used with System-On-Chip (SOC) platforms
+ to support power management and various devices which may need
+ custom clock rates.
+ Note that these "clocks" don't relate to timekeeping or real
+ time clocks (RTCs), each of which have separate frameworks.
+ These <structname>struct clk</structname> instances may be used
+ to manage for example a 96 MHz signal that is used to shift bits
+ into and out of peripherals or busses, or otherwise trigger
+ synchronous state machine transitions in system hardware.
+ </para>
+
+ <para>
+ Power management is supported by explicit software clock gating:
+ unused clocks are disabled, so the system doesn't waste power
+ changing the state of transistors that aren't in active use.
+ On some systems this may be backed by hardware clock gating,
+ where clocks are gated without being disabled in software.
+ Sections of chips that are powered but not clocked may be able
+ to retain their last state.
+ This low power state is often called a <emphasis>retention
+ mode</emphasis>.
+ This mode still incurs leakage currents, especially with finer
+ circuit geometries, but for CMOS circuits power is mostly used
+ by clocked state changes.
+ </para>
+
+ <para>
+ Power-aware drivers only enable their clocks when the device
+ they manage is in active use. Also, system sleep states often
+ differ according to which clock domains are active: while a
+ "standby" state may allow wakeup from several active domains, a
+ "mem" (suspend-to-RAM) state may require a more wholesale shutdown
+ of clocks derived from higher speed PLLs and oscillators, limiting
+ the number of possible wakeup event sources. A driver's suspend
+ method may need to be aware of system-specific clock constraints
+ on the target sleep state.
+ </para>
+
+ <para>
+ Some platforms support programmable clock generators. These
+ can be used by external chips of various kinds, such as other
+ CPUs, multimedia codecs, and devices with strict requirements
+ for interface clocking.
+ </para>
+
+!Iinclude/linux/clk.h
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
new file mode 100644
index 00000000..d0758b24
--- /dev/null
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -0,0 +1,1320 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="lk-hacking-guide">
+ <bookinfo>
+ <title>Unreliable Guide To Hacking The Linux Kernel</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Rusty</firstname>
+ <surname>Russell</surname>
+ <affiliation>
+ <address>
+ <email>rusty@rustcorp.com.au</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2005</year>
+ <holder>Rusty Russell</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+
+ <releaseinfo>
+ This is the first release of this document as part of the kernel tarball.
+ </releaseinfo>
+
+ </bookinfo>
+
+ <toc></toc>
+
+ <chapter id="introduction">
+ <title>Introduction</title>
+ <para>
+ Welcome, gentle reader, to Rusty's Remarkably Unreliable Guide to Linux
+ Kernel Hacking. This document describes the common routines and
+ general requirements for kernel code: its goal is to serve as a
+ primer for Linux kernel development for experienced C
+ programmers. I avoid implementation details: that's what the
+ code is for, and I ignore whole tracts of useful routines.
+ </para>
+ <para>
+ Before you read this, please understand that I never wanted to
+ write this document, being grossly under-qualified, but I always
+ wanted to read it, and this was the only way. I hope it will
+ grow into a compendium of best practice, common starting points
+ and random information.
+ </para>
+ </chapter>
+
+ <chapter id="basic-players">
+ <title>The Players</title>
+
+ <para>
+ At any time each of the CPUs in a system can be:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ not associated with any process, serving a hardware interrupt;
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ not associated with any process, serving a softirq or tasklet;
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ running in kernel space, associated with a process (user context);
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ running a process in user space.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ There is an ordering between these. The bottom two can preempt
+ each other, but above that is a strict hierarchy: each can only be
+ preempted by the ones above it. For example, while a softirq is
+ running on a CPU, no other softirq will preempt it, but a hardware
+ interrupt can. However, any other CPUs in the system execute
+ independently.
+ </para>
+
+ <para>
+ We'll see a number of ways that the user context can block
+ interrupts, to become truly non-preemptable.
+ </para>
+
+ <sect1 id="basics-usercontext">
+ <title>User Context</title>
+
+ <para>
+ User context is when you are coming in from a system call or other
+ trap: like userspace, you can be preempted by more important tasks
+ and by interrupts. You can sleep, by calling
+ <function>schedule()</function>.
+ </para>
+
+ <note>
+ <para>
+ You are always in user context on module load and unload,
+ and on operations on the block device layer.
+ </para>
+ </note>
+
+ <para>
+ In user context, the <varname>current</varname> pointer (indicating
+ the task we are currently executing) is valid, and
+ <function>in_interrupt()</function>
+ (<filename>include/linux/interrupt.h</filename>) is <returnvalue>false
+ </returnvalue>.
+ </para>
+
+ <caution>
+ <para>
+ Beware that if you have preemption or softirqs disabled
+ (see below), <function>in_interrupt()</function> will return a
+ false positive.
+ </para>
+ </caution>
+ </sect1>
+
+ <sect1 id="basics-hardirqs">
+ <title>Hardware Interrupts (Hard IRQs)</title>
+
+ <para>
+ Timer ticks, <hardware>network cards</hardware> and
+ <hardware>keyboard</hardware> are examples of real
+ hardware which produce interrupts at any time. The kernel runs
+ interrupt handlers, which services the hardware. The kernel
+ guarantees that this handler is never re-entered: if the same
+ interrupt arrives, it is queued (or dropped). Because it
+ disables interrupts, this handler has to be fast: frequently it
+ simply acknowledges the interrupt, marks a 'software interrupt'
+ for execution and exits.
+ </para>
+
+ <para>
+ You can tell you are in a hardware interrupt, because
+ <function>in_irq()</function> returns <returnvalue>true</returnvalue>.
+ </para>
+ <caution>
+ <para>
+ Beware that this will return a false positive if interrupts are disabled
+ (see below).
+ </para>
+ </caution>
+ </sect1>
+
+ <sect1 id="basics-softirqs">
+ <title>Software Interrupt Context: Softirqs and Tasklets</title>
+
+ <para>
+ Whenever a system call is about to return to userspace, or a
+ hardware interrupt handler exits, any 'software interrupts'
+ which are marked pending (usually by hardware interrupts) are
+ run (<filename>kernel/softirq.c</filename>).
+ </para>
+
+ <para>
+ Much of the real interrupt handling work is done here. Early in
+ the transition to <acronym>SMP</acronym>, there were only 'bottom
+ halves' (BHs), which didn't take advantage of multiple CPUs. Shortly
+ after we switched from wind-up computers made of match-sticks and snot,
+ we abandoned this limitation and switched to 'softirqs'.
+ </para>
+
+ <para>
+ <filename class="headerfile">include/linux/interrupt.h</filename> lists the
+ different softirqs. A very important softirq is the
+ timer softirq (<filename
+ class="headerfile">include/linux/timer.h</filename>): you can
+ register to have it call functions for you in a given length of
+ time.
+ </para>
+
+ <para>
+ Softirqs are often a pain to deal with, since the same softirq
+ will run simultaneously on more than one CPU. For this reason,
+ tasklets (<filename
+ class="headerfile">include/linux/interrupt.h</filename>) are more
+ often used: they are dynamically-registrable (meaning you can have
+ as many as you want), and they also guarantee that any tasklet
+ will only run on one CPU at any time, although different tasklets
+ can run simultaneously.
+ </para>
+ <caution>
+ <para>
+ The name 'tasklet' is misleading: they have nothing to do with 'tasks',
+ and probably more to do with some bad vodka Alexey Kuznetsov had at the
+ time.
+ </para>
+ </caution>
+
+ <para>
+ You can tell you are in a softirq (or tasklet)
+ using the <function>in_softirq()</function> macro
+ (<filename class="headerfile">include/linux/interrupt.h</filename>).
+ </para>
+ <caution>
+ <para>
+ Beware that this will return a false positive if a bh lock (see below)
+ is held.
+ </para>
+ </caution>
+ </sect1>
+ </chapter>
+
+ <chapter id="basic-rules">
+ <title>Some Basic Rules</title>
+
+ <variablelist>
+ <varlistentry>
+ <term>No memory protection</term>
+ <listitem>
+ <para>
+ If you corrupt memory, whether in user context or
+ interrupt context, the whole machine will crash. Are you
+ sure you can't do what you want in userspace?
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>No floating point or <acronym>MMX</acronym></term>
+ <listitem>
+ <para>
+ The <acronym>FPU</acronym> context is not saved; even in user
+ context the <acronym>FPU</acronym> state probably won't
+ correspond with the current process: you would mess with some
+ user process' <acronym>FPU</acronym> state. If you really want
+ to do this, you would have to explicitly save/restore the full
+ <acronym>FPU</acronym> state (and avoid context switches). It
+ is generally a bad idea; use fixed point arithmetic first.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>A rigid stack limit</term>
+ <listitem>
+ <para>
+ Depending on configuration options the kernel stack is about 3K to 6K for most 32-bit architectures: it's
+ about 14K on most 64-bit archs, and often shared with interrupts
+ so you can't use it all. Avoid deep recursion and huge local
+ arrays on the stack (allocate them dynamically instead).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>The Linux kernel is portable</term>
+ <listitem>
+ <para>
+ Let's keep it that way. Your code should be 64-bit clean,
+ and endian-independent. You should also minimize CPU
+ specific stuff, e.g. inline assembly should be cleanly
+ encapsulated and minimized to ease porting. Generally it
+ should be restricted to the architecture-dependent part of
+ the kernel tree.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </chapter>
+
+ <chapter id="ioctls">
+ <title>ioctls: Not writing a new system call</title>
+
+ <para>
+ A system call generally looks like this
+ </para>
+
+ <programlisting>
+asmlinkage long sys_mycall(int arg)
+{
+ return 0;
+}
+ </programlisting>
+
+ <para>
+ First, in most cases you don't want to create a new system call.
+ You create a character device and implement an appropriate ioctl
+ for it. This is much more flexible than system calls, doesn't have
+ to be entered in every architecture's
+ <filename class="headerfile">include/asm/unistd.h</filename> and
+ <filename>arch/kernel/entry.S</filename> file, and is much more
+ likely to be accepted by Linus.
+ </para>
+
+ <para>
+ If all your routine does is read or write some parameter, consider
+ implementing a <function>sysfs</function> interface instead.
+ </para>
+
+ <para>
+ Inside the ioctl you're in user context to a process. When a
+ error occurs you return a negated errno (see
+ <filename class="headerfile">include/linux/errno.h</filename>),
+ otherwise you return <returnvalue>0</returnvalue>.
+ </para>
+
+ <para>
+ After you slept you should check if a signal occurred: the
+ Unix/Linux way of handling signals is to temporarily exit the
+ system call with the <constant>-ERESTARTSYS</constant> error. The
+ system call entry code will switch back to user context, process
+ the signal handler and then your system call will be restarted
+ (unless the user disabled that). So you should be prepared to
+ process the restart, e.g. if you're in the middle of manipulating
+ some data structure.
+ </para>
+
+ <programlisting>
+if (signal_pending(current))
+ return -ERESTARTSYS;
+ </programlisting>
+
+ <para>
+ If you're doing longer computations: first think userspace. If you
+ <emphasis>really</emphasis> want to do it in kernel you should
+ regularly check if you need to give up the CPU (remember there is
+ cooperative multitasking per CPU). Idiom:
+ </para>
+
+ <programlisting>
+cond_resched(); /* Will sleep */
+ </programlisting>
+
+ <para>
+ A short note on interface design: the UNIX system call motto is
+ "Provide mechanism not policy".
+ </para>
+ </chapter>
+
+ <chapter id="deadlock-recipes">
+ <title>Recipes for Deadlock</title>
+
+ <para>
+ You cannot call any routines which may sleep, unless:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ You are in user context.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ You do not own any spinlocks.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ You have interrupts enabled (actually, Andi Kleen says
+ that the scheduling code will enable them for you, but
+ that's probably not what you wanted).
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Note that some functions may sleep implicitly: common ones are
+ the user space access functions (*_user) and memory allocation
+ functions without <symbol>GFP_ATOMIC</symbol>.
+ </para>
+
+ <para>
+ You should always compile your kernel
+ <symbol>CONFIG_DEBUG_ATOMIC_SLEEP</symbol> on, and it will warn
+ you if you break these rules. If you <emphasis>do</emphasis> break
+ the rules, you will eventually lock up your box.
+ </para>
+
+ <para>
+ Really.
+ </para>
+ </chapter>
+
+ <chapter id="common-routines">
+ <title>Common Routines</title>
+
+ <sect1 id="routines-printk">
+ <title>
+ <function>printk()</function>
+ <filename class="headerfile">include/linux/kernel.h</filename>
+ </title>
+
+ <para>
+ <function>printk()</function> feeds kernel messages to the
+ console, dmesg, and the syslog daemon. It is useful for debugging
+ and reporting errors, and can be used inside interrupt context,
+ but use with caution: a machine which has its console flooded with
+ printk messages is unusable. It uses a format string mostly
+ compatible with ANSI C printf, and C string concatenation to give
+ it a first "priority" argument:
+ </para>
+
+ <programlisting>
+printk(KERN_INFO "i = %u\n", i);
+ </programlisting>
+
+ <para>
+ See <filename class="headerfile">include/linux/kernel.h</filename>;
+ for other KERN_ values; these are interpreted by syslog as the
+ level. Special case: for printing an IP address use
+ </para>
+
+ <programlisting>
+__be32 ipaddress;
+printk(KERN_INFO "my ip: %pI4\n", &amp;ipaddress);
+ </programlisting>
+
+ <para>
+ <function>printk()</function> internally uses a 1K buffer and does
+ not catch overruns. Make sure that will be enough.
+ </para>
+
+ <note>
+ <para>
+ You will know when you are a real kernel hacker
+ when you start typoing printf as printk in your user programs :)
+ </para>
+ </note>
+
+ <!--- From the Lions book reader department -->
+
+ <note>
+ <para>
+ Another sidenote: the original Unix Version 6 sources had a
+ comment on top of its printf function: "Printf should not be
+ used for chit-chat". You should follow that advice.
+ </para>
+ </note>
+ </sect1>
+
+ <sect1 id="routines-copy">
+ <title>
+ <function>copy_[to/from]_user()</function>
+ /
+ <function>get_user()</function>
+ /
+ <function>put_user()</function>
+ <filename class="headerfile">include/asm/uaccess.h</filename>
+ </title>
+
+ <para>
+ <emphasis>[SLEEPS]</emphasis>
+ </para>
+
+ <para>
+ <function>put_user()</function> and <function>get_user()</function>
+ are used to get and put single values (such as an int, char, or
+ long) from and to userspace. A pointer into userspace should
+ never be simply dereferenced: data should be copied using these
+ routines. Both return <constant>-EFAULT</constant> or 0.
+ </para>
+ <para>
+ <function>copy_to_user()</function> and
+ <function>copy_from_user()</function> are more general: they copy
+ an arbitrary amount of data to and from userspace.
+ <caution>
+ <para>
+ Unlike <function>put_user()</function> and
+ <function>get_user()</function>, they return the amount of
+ uncopied data (ie. <returnvalue>0</returnvalue> still means
+ success).
+ </para>
+ </caution>
+ [Yes, this moronic interface makes me cringe. The flamewar comes up every year or so. --RR.]
+ </para>
+ <para>
+ The functions may sleep implicitly. This should never be called
+ outside user context (it makes no sense), with interrupts
+ disabled, or a spinlock held.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-kmalloc">
+ <title><function>kmalloc()</function>/<function>kfree()</function>
+ <filename class="headerfile">include/linux/slab.h</filename></title>
+
+ <para>
+ <emphasis>[MAY SLEEP: SEE BELOW]</emphasis>
+ </para>
+
+ <para>
+ These routines are used to dynamically request pointer-aligned
+ chunks of memory, like malloc and free do in userspace, but
+ <function>kmalloc()</function> takes an extra flag word.
+ Important values:
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term>
+ <constant>
+ GFP_KERNEL
+ </constant>
+ </term>
+ <listitem>
+ <para>
+ May sleep and swap to free memory. Only allowed in user
+ context, but is the most reliable way to allocate memory.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ <constant>
+ GFP_ATOMIC
+ </constant>
+ </term>
+ <listitem>
+ <para>
+ Don't sleep. Less reliable than <constant>GFP_KERNEL</constant>,
+ but may be called from interrupt context. You should
+ <emphasis>really</emphasis> have a good out-of-memory
+ error-handling strategy.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ <constant>
+ GFP_DMA
+ </constant>
+ </term>
+ <listitem>
+ <para>
+ Allocate ISA DMA lower than 16MB. If you don't know what that
+ is you don't need it. Very unreliable.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ If you see a <errorname>sleeping function called from invalid
+ context</errorname> warning message, then maybe you called a
+ sleeping allocation function from interrupt context without
+ <constant>GFP_ATOMIC</constant>. You should really fix that.
+ Run, don't walk.
+ </para>
+
+ <para>
+ If you are allocating at least <constant>PAGE_SIZE</constant>
+ (<filename class="headerfile">include/asm/page.h</filename>) bytes,
+ consider using <function>__get_free_pages()</function>
+
+ (<filename class="headerfile">include/linux/mm.h</filename>). It
+ takes an order argument (0 for page sized, 1 for double page, 2
+ for four pages etc.) and the same memory priority flag word as
+ above.
+ </para>
+
+ <para>
+ If you are allocating more than a page worth of bytes you can use
+ <function>vmalloc()</function>. It'll allocate virtual memory in
+ the kernel map. This block is not contiguous in physical memory,
+ but the <acronym>MMU</acronym> makes it look like it is for you
+ (so it'll only look contiguous to the CPUs, not to external device
+ drivers). If you really need large physically contiguous memory
+ for some weird device, you have a problem: it is poorly supported
+ in Linux because after some time memory fragmentation in a running
+ kernel makes it hard. The best way is to allocate the block early
+ in the boot process via the <function>alloc_bootmem()</function>
+ routine.
+ </para>
+
+ <para>
+ Before inventing your own cache of often-used objects consider
+ using a slab cache in
+ <filename class="headerfile">include/linux/slab.h</filename>
+ </para>
+ </sect1>
+
+ <sect1 id="routines-current">
+ <title><function>current</function>
+ <filename class="headerfile">include/asm/current.h</filename></title>
+
+ <para>
+ This global variable (really a macro) contains a pointer to
+ the current task structure, so is only valid in user context.
+ For example, when a process makes a system call, this will
+ point to the task structure of the calling process. It is
+ <emphasis>not NULL</emphasis> in interrupt context.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-udelay">
+ <title><function>mdelay()</function>/<function>udelay()</function>
+ <filename class="headerfile">include/asm/delay.h</filename>
+ <filename class="headerfile">include/linux/delay.h</filename>
+ </title>
+
+ <para>
+ The <function>udelay()</function> and <function>ndelay()</function> functions can be used for small pauses.
+ Do not use large values with them as you risk
+ overflow - the helper function <function>mdelay()</function> is useful
+ here, or consider <function>msleep()</function>.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-endian">
+ <title><function>cpu_to_be32()</function>/<function>be32_to_cpu()</function>/<function>cpu_to_le32()</function>/<function>le32_to_cpu()</function>
+ <filename class="headerfile">include/asm/byteorder.h</filename>
+ </title>
+
+ <para>
+ The <function>cpu_to_be32()</function> family (where the "32" can
+ be replaced by 64 or 16, and the "be" can be replaced by "le") are
+ the general way to do endian conversions in the kernel: they
+ return the converted value. All variations supply the reverse as
+ well: <function>be32_to_cpu()</function>, etc.
+ </para>
+
+ <para>
+ There are two major variations of these functions: the pointer
+ variation, such as <function>cpu_to_be32p()</function>, which take
+ a pointer to the given type, and return the converted value. The
+ other variation is the "in-situ" family, such as
+ <function>cpu_to_be32s()</function>, which convert value referred
+ to by the pointer, and return void.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-local-irqs">
+ <title><function>local_irq_save()</function>/<function>local_irq_restore()</function>
+ <filename class="headerfile">include/asm/system.h</filename>
+ </title>
+
+ <para>
+ These routines disable hard interrupts on the local CPU, and
+ restore them. They are reentrant; saving the previous state in
+ their one <varname>unsigned long flags</varname> argument. If you
+ know that interrupts are enabled, you can simply use
+ <function>local_irq_disable()</function> and
+ <function>local_irq_enable()</function>.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-softirqs">
+ <title><function>local_bh_disable()</function>/<function>local_bh_enable()</function>
+ <filename class="headerfile">include/linux/interrupt.h</filename></title>
+
+ <para>
+ These routines disable soft interrupts on the local CPU, and
+ restore them. They are reentrant; if soft interrupts were
+ disabled before, they will still be disabled after this pair
+ of functions has been called. They prevent softirqs and tasklets
+ from running on the current CPU.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-processorids">
+ <title><function>smp_processor_id</function>()
+ <filename class="headerfile">include/asm/smp.h</filename></title>
+
+ <para>
+ <function>get_cpu()</function> disables preemption (so you won't
+ suddenly get moved to another CPU) and returns the current
+ processor number, between 0 and <symbol>NR_CPUS</symbol>. Note
+ that the CPU numbers are not necessarily continuous. You return
+ it again with <function>put_cpu()</function> when you are done.
+ </para>
+ <para>
+ If you know you cannot be preempted by another task (ie. you are
+ in interrupt context, or have preemption disabled) you can use
+ smp_processor_id().
+ </para>
+ </sect1>
+
+ <sect1 id="routines-init">
+ <title><type>__init</type>/<type>__exit</type>/<type>__initdata</type>
+ <filename class="headerfile">include/linux/init.h</filename></title>
+
+ <para>
+ After boot, the kernel frees up a special section; functions
+ marked with <type>__init</type> and data structures marked with
+ <type>__initdata</type> are dropped after boot is complete: similarly
+ modules discard this memory after initialization. <type>__exit</type>
+ is used to declare a function which is only required on exit: the
+ function will be dropped if this file is not compiled as a module.
+ See the header file for use. Note that it makes no sense for a function
+ marked with <type>__init</type> to be exported to modules with
+ <function>EXPORT_SYMBOL()</function> - this will break.
+ </para>
+
+ </sect1>
+
+ <sect1 id="routines-init-again">
+ <title><function>__initcall()</function>/<function>module_init()</function>
+ <filename class="headerfile">include/linux/init.h</filename></title>
+ <para>
+ Many parts of the kernel are well served as a module
+ (dynamically-loadable parts of the kernel). Using the
+ <function>module_init()</function> and
+ <function>module_exit()</function> macros it is easy to write code
+ without #ifdefs which can operate both as a module or built into
+ the kernel.
+ </para>
+
+ <para>
+ The <function>module_init()</function> macro defines which
+ function is to be called at module insertion time (if the file is
+ compiled as a module), or at boot time: if the file is not
+ compiled as a module the <function>module_init()</function> macro
+ becomes equivalent to <function>__initcall()</function>, which
+ through linker magic ensures that the function is called on boot.
+ </para>
+
+ <para>
+ The function can return a negative error number to cause
+ module loading to fail (unfortunately, this has no effect if
+ the module is compiled into the kernel). This function is
+ called in user context with interrupts enabled, so it can sleep.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-moduleexit">
+ <title> <function>module_exit()</function>
+ <filename class="headerfile">include/linux/init.h</filename> </title>
+
+ <para>
+ This macro defines the function to be called at module removal
+ time (or never, in the case of the file compiled into the
+ kernel). It will only be called if the module usage count has
+ reached zero. This function can also sleep, but cannot fail:
+ everything must be cleaned up by the time it returns.
+ </para>
+
+ <para>
+ Note that this macro is optional: if it is not present, your
+ module will not be removable (except for 'rmmod -f').
+ </para>
+ </sect1>
+
+ <sect1 id="routines-module-use-counters">
+ <title> <function>try_module_get()</function>/<function>module_put()</function>
+ <filename class="headerfile">include/linux/module.h</filename></title>
+
+ <para>
+ These manipulate the module usage count, to protect against
+ removal (a module also can't be removed if another module uses one
+ of its exported symbols: see below). Before calling into module
+ code, you should call <function>try_module_get()</function> on
+ that module: if it fails, then the module is being removed and you
+ should act as if it wasn't there. Otherwise, you can safely enter
+ the module, and call <function>module_put()</function> when you're
+ finished.
+ </para>
+
+ <para>
+ Most registerable structures have an
+ <structfield>owner</structfield> field, such as in the
+ <structname>file_operations</structname> structure. Set this field
+ to the macro <symbol>THIS_MODULE</symbol>.
+ </para>
+ </sect1>
+
+ <!-- add info on new-style module refcounting here -->
+ </chapter>
+
+ <chapter id="queues">
+ <title>Wait Queues
+ <filename class="headerfile">include/linux/wait.h</filename>
+ </title>
+ <para>
+ <emphasis>[SLEEPS]</emphasis>
+ </para>
+
+ <para>
+ A wait queue is used to wait for someone to wake you up when a
+ certain condition is true. They must be used carefully to ensure
+ there is no race condition. You declare a
+ <type>wait_queue_head_t</type>, and then processes which want to
+ wait for that condition declare a <type>wait_queue_t</type>
+ referring to themselves, and place that in the queue.
+ </para>
+
+ <sect1 id="queue-declaring">
+ <title>Declaring</title>
+
+ <para>
+ You declare a <type>wait_queue_head_t</type> using the
+ <function>DECLARE_WAIT_QUEUE_HEAD()</function> macro, or using the
+ <function>init_waitqueue_head()</function> routine in your
+ initialization code.
+ </para>
+ </sect1>
+
+ <sect1 id="queue-waitqueue">
+ <title>Queuing</title>
+
+ <para>
+ Placing yourself in the waitqueue is fairly complex, because you
+ must put yourself in the queue before checking the condition.
+ There is a macro to do this:
+ <function>wait_event_interruptible()</function>
+
+ <filename class="headerfile">include/linux/wait.h</filename> The
+ first argument is the wait queue head, and the second is an
+ expression which is evaluated; the macro returns
+ <returnvalue>0</returnvalue> when this expression is true, or
+ <returnvalue>-ERESTARTSYS</returnvalue> if a signal is received.
+ The <function>wait_event()</function> version ignores signals.
+ </para>
+ <para>
+ Do not use the <function>sleep_on()</function> function family -
+ it is very easy to accidentally introduce races; almost certainly
+ one of the <function>wait_event()</function> family will do, or a
+ loop around <function>schedule_timeout()</function>. If you choose
+ to loop around <function>schedule_timeout()</function> remember
+ you must set the task state (with
+ <function>set_current_state()</function>) on each iteration to avoid
+ busy-looping.
+ </para>
+
+ </sect1>
+
+ <sect1 id="queue-waking">
+ <title>Waking Up Queued Tasks</title>
+
+ <para>
+ Call <function>wake_up()</function>
+
+ <filename class="headerfile">include/linux/wait.h</filename>;,
+ which will wake up every process in the queue. The exception is
+ if one has <constant>TASK_EXCLUSIVE</constant> set, in which case
+ the remainder of the queue will not be woken. There are other variants
+ of this basic function available in the same header.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="atomic-ops">
+ <title>Atomic Operations</title>
+
+ <para>
+ Certain operations are guaranteed atomic on all platforms. The
+ first class of operations work on <type>atomic_t</type>
+
+ <filename class="headerfile">include/asm/atomic.h</filename>; this
+ contains a signed integer (at least 32 bits long), and you must use
+ these functions to manipulate or read atomic_t variables.
+ <function>atomic_read()</function> and
+ <function>atomic_set()</function> get and set the counter,
+ <function>atomic_add()</function>,
+ <function>atomic_sub()</function>,
+ <function>atomic_inc()</function>,
+ <function>atomic_dec()</function>, and
+ <function>atomic_dec_and_test()</function> (returns
+ <returnvalue>true</returnvalue> if it was decremented to zero).
+ </para>
+
+ <para>
+ Yes. It returns <returnvalue>true</returnvalue> (i.e. != 0) if the
+ atomic variable is zero.
+ </para>
+
+ <para>
+ Note that these functions are slower than normal arithmetic, and
+ so should not be used unnecessarily.
+ </para>
+
+ <para>
+ The second class of atomic operations is atomic bit operations on an
+ <type>unsigned long</type>, defined in
+
+ <filename class="headerfile">include/linux/bitops.h</filename>. These
+ operations generally take a pointer to the bit pattern, and a bit
+ number: 0 is the least significant bit.
+ <function>set_bit()</function>, <function>clear_bit()</function>
+ and <function>change_bit()</function> set, clear, and flip the
+ given bit. <function>test_and_set_bit()</function>,
+ <function>test_and_clear_bit()</function> and
+ <function>test_and_change_bit()</function> do the same thing,
+ except return true if the bit was previously set; these are
+ particularly useful for atomically setting flags.
+ </para>
+
+ <para>
+ It is possible to call these operations with bit indices greater
+ than BITS_PER_LONG. The resulting behavior is strange on big-endian
+ platforms though so it is a good idea not to do this.
+ </para>
+ </chapter>
+
+ <chapter id="symbols">
+ <title>Symbols</title>
+
+ <para>
+ Within the kernel proper, the normal linking rules apply
+ (ie. unless a symbol is declared to be file scope with the
+ <type>static</type> keyword, it can be used anywhere in the
+ kernel). However, for modules, a special exported symbol table is
+ kept which limits the entry points to the kernel proper. Modules
+ can also export symbols.
+ </para>
+
+ <sect1 id="sym-exportsymbols">
+ <title><function>EXPORT_SYMBOL()</function>
+ <filename class="headerfile">include/linux/export.h</filename></title>
+
+ <para>
+ This is the classic method of exporting a symbol: dynamically
+ loaded modules will be able to use the symbol as normal.
+ </para>
+ </sect1>
+
+ <sect1 id="sym-exportsymbols-gpl">
+ <title><function>EXPORT_SYMBOL_GPL()</function>
+ <filename class="headerfile">include/linux/export.h</filename></title>
+
+ <para>
+ Similar to <function>EXPORT_SYMBOL()</function> except that the
+ symbols exported by <function>EXPORT_SYMBOL_GPL()</function> can
+ only be seen by modules with a
+ <function>MODULE_LICENSE()</function> that specifies a GPL
+ compatible license. It implies that the function is considered
+ an internal implementation issue, and not really an interface.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="conventions">
+ <title>Routines and Conventions</title>
+
+ <sect1 id="conventions-doublelinkedlist">
+ <title>Double-linked lists
+ <filename class="headerfile">include/linux/list.h</filename></title>
+
+ <para>
+ There used to be three sets of linked-list routines in the kernel
+ headers, but this one is the winner. If you don't have some
+ particular pressing need for a single list, it's a good choice.
+ </para>
+
+ <para>
+ In particular, <function>list_for_each_entry</function> is useful.
+ </para>
+ </sect1>
+
+ <sect1 id="convention-returns">
+ <title>Return Conventions</title>
+
+ <para>
+ For code called in user context, it's very common to defy C
+ convention, and return <returnvalue>0</returnvalue> for success,
+ and a negative error number
+ (eg. <returnvalue>-EFAULT</returnvalue>) for failure. This can be
+ unintuitive at first, but it's fairly widespread in the kernel.
+ </para>
+
+ <para>
+ Using <function>ERR_PTR()</function>
+
+ <filename class="headerfile">include/linux/err.h</filename>; to
+ encode a negative error number into a pointer, and
+ <function>IS_ERR()</function> and <function>PTR_ERR()</function>
+ to get it back out again: avoids a separate pointer parameter for
+ the error number. Icky, but in a good way.
+ </para>
+ </sect1>
+
+ <sect1 id="conventions-borkedcompile">
+ <title>Breaking Compilation</title>
+
+ <para>
+ Linus and the other developers sometimes change function or
+ structure names in development kernels; this is not done just to
+ keep everyone on their toes: it reflects a fundamental change
+ (eg. can no longer be called with interrupts on, or does extra
+ checks, or doesn't do checks which were caught before). Usually
+ this is accompanied by a fairly complete note to the linux-kernel
+ mailing list; search the archive. Simply doing a global replace
+ on the file usually makes things <emphasis>worse</emphasis>.
+ </para>
+ </sect1>
+
+ <sect1 id="conventions-initialising">
+ <title>Initializing structure members</title>
+
+ <para>
+ The preferred method of initializing structures is to use
+ designated initialisers, as defined by ISO C99, eg:
+ </para>
+ <programlisting>
+static struct block_device_operations opt_fops = {
+ .open = opt_open,
+ .release = opt_release,
+ .ioctl = opt_ioctl,
+ .check_media_change = opt_media_change,
+};
+ </programlisting>
+ <para>
+ This makes it easy to grep for, and makes it clear which
+ structure fields are set. You should do this because it looks
+ cool.
+ </para>
+ </sect1>
+
+ <sect1 id="conventions-gnu-extns">
+ <title>GNU Extensions</title>
+
+ <para>
+ GNU Extensions are explicitly allowed in the Linux kernel.
+ Note that some of the more complex ones are not very well
+ supported, due to lack of general use, but the following are
+ considered standard (see the GCC info page section "C
+ Extensions" for more details - Yes, really the info page, the
+ man page is only a short summary of the stuff in info).
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ Inline functions
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Statement expressions (ie. the ({ and }) constructs).
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Declaring attributes of a function / variable / type
+ (__attribute__)
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ typeof
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Zero length arrays
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Macro varargs
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Arithmetic on void pointers
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Non-Constant initializers
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Assembler Instructions (not outside arch/ and include/asm/)
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Function names as strings (__func__).
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ __builtin_constant_p()
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Be wary when using long long in the kernel, the code gcc generates for
+ it is horrible and worse: division and multiplication does not work
+ on i386 because the GCC runtime functions for it are missing from
+ the kernel environment.
+ </para>
+
+ <!-- FIXME: add a note about ANSI aliasing cleanness -->
+ </sect1>
+
+ <sect1 id="conventions-cplusplus">
+ <title>C++</title>
+
+ <para>
+ Using C++ in the kernel is usually a bad idea, because the
+ kernel does not provide the necessary runtime environment
+ and the include files are not tested for it. It is still
+ possible, but not recommended. If you really want to do
+ this, forget about exceptions at least.
+ </para>
+ </sect1>
+
+ <sect1 id="conventions-ifdef">
+ <title>&num;if</title>
+
+ <para>
+ It is generally considered cleaner to use macros in header files
+ (or at the top of .c files) to abstract away functions rather than
+ using `#if' pre-processor statements throughout the source code.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="submitting">
+ <title>Putting Your Stuff in the Kernel</title>
+
+ <para>
+ In order to get your stuff into shape for official inclusion, or
+ even to make a neat patch, there's administrative work to be
+ done:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ Figure out whose pond you've been pissing in. Look at the top of
+ the source files, inside the <filename>MAINTAINERS</filename>
+ file, and last of all in the <filename>CREDITS</filename> file.
+ You should coordinate with this person to make sure you're not
+ duplicating effort, or trying something that's already been
+ rejected.
+ </para>
+
+ <para>
+ Make sure you put your name and EMail address at the top of
+ any files you create or mangle significantly. This is the
+ first place people will look when they find a bug, or when
+ <emphasis>they</emphasis> want to make a change.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Usually you want a configuration option for your kernel hack.
+ Edit <filename>Kconfig</filename> in the appropriate directory.
+ The Config language is simple to use by cut and paste, and there's
+ complete documentation in
+ <filename>Documentation/kbuild/kconfig-language.txt</filename>.
+ </para>
+
+ <para>
+ In your description of the option, make sure you address both the
+ expert user and the user who knows nothing about your feature. Mention
+ incompatibilities and issues here. <emphasis> Definitely
+ </emphasis> end your description with <quote> if in doubt, say N
+ </quote> (or, occasionally, `Y'); this is for people who have no
+ idea what you are talking about.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Edit the <filename>Makefile</filename>: the CONFIG variables are
+ exported here so you can usually just add a "obj-$(CONFIG_xxx) +=
+ xxx.o" line. The syntax is documented in
+ <filename>Documentation/kbuild/makefiles.txt</filename>.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Put yourself in <filename>CREDITS</filename> if you've done
+ something noteworthy, usually beyond a single file (your name
+ should be at the top of the source files anyway).
+ <filename>MAINTAINERS</filename> means you want to be consulted
+ when changes are made to a subsystem, and hear about bugs; it
+ implies a more-than-passing commitment to some part of the code.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Finally, don't forget to read <filename>Documentation/SubmittingPatches</filename>
+ and possibly <filename>Documentation/SubmittingDrivers</filename>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </chapter>
+
+ <chapter id="cantrips">
+ <title>Kernel Cantrips</title>
+
+ <para>
+ Some favorites from browsing the source. Feel free to add to this
+ list.
+ </para>
+
+ <para>
+ <filename>arch/x86/include/asm/delay.h:</filename>
+ </para>
+ <programlisting>
+#define ndelay(n) (__builtin_constant_p(n) ? \
+ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
+ __ndelay(n))
+ </programlisting>
+
+ <para>
+ <filename>include/linux/fs.h</filename>:
+ </para>
+ <programlisting>
+/*
+ * Kernel pointers have redundant information, so we can use a
+ * scheme where we can return either an error code or a dentry
+ * pointer with the same return value.
+ *
+ * This should be a per-architecture thing, to allow different
+ * error and pointer decisions.
+ */
+ #define ERR_PTR(err) ((void *)((long)(err)))
+ #define PTR_ERR(ptr) ((long)(ptr))
+ #define IS_ERR(ptr) ((unsigned long)(ptr) > (unsigned long)(-1000))
+</programlisting>
+
+ <para>
+ <filename>arch/x86/include/asm/uaccess_32.h:</filename>
+ </para>
+
+ <programlisting>
+#define copy_to_user(to,from,n) \
+ (__builtin_constant_p(n) ? \
+ __constant_copy_to_user((to),(from),(n)) : \
+ __generic_copy_to_user((to),(from),(n)))
+ </programlisting>
+
+ <para>
+ <filename>arch/sparc/kernel/head.S:</filename>
+ </para>
+
+ <programlisting>
+/*
+ * Sun people can't spell worth damn. "compatability" indeed.
+ * At least we *know* we can't spell, and use a spell-checker.
+ */
+
+/* Uh, actually Linus it is I who cannot spell. Too much murky
+ * Sparc assembly will do this to ya.
+ */
+C_LABEL(cputypvar):
+ .asciz "compatibility"
+
+/* Tested on SS-5, SS-10. Probably someone at Sun applied a spell-checker. */
+ .align 4
+C_LABEL(cputypvar_sun4m):
+ .asciz "compatible"
+ </programlisting>
+
+ <para>
+ <filename>arch/sparc/lib/checksum.S:</filename>
+ </para>
+
+ <programlisting>
+ /* Sun, you just can't beat me, you just can't. Stop trying,
+ * give up. I'm serious, I am going to kick the living shit
+ * out of you, game over, lights out.
+ */
+ </programlisting>
+ </chapter>
+
+ <chapter id="credits">
+ <title>Thanks</title>
+
+ <para>
+ Thanks to Andi Kleen for the idea, answering my questions, fixing
+ my mistakes, filling content, etc. Philipp Rumpf for more spelling
+ and clarity fixes, and some excellent non-obvious points. Werner
+ Almesberger for giving me a great summary of
+ <function>disable_irq()</function>, and Jes Sorensen and Andrea
+ Arcangeli added caveats. Michael Elizabeth Chastain for checking
+ and adding to the Configure section. <!-- Rusty insisted on this
+ bit; I didn't do it! --> Telsa Gwynne for teaching me DocBook.
+ </para>
+ </chapter>
+</book>
+
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
new file mode 100644
index 00000000..67e7ab41
--- /dev/null
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -0,0 +1,2146 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LKLockingGuide">
+ <bookinfo>
+ <title>Unreliable Guide To Locking</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Rusty</firstname>
+ <surname>Russell</surname>
+ <affiliation>
+ <address>
+ <email>rusty@rustcorp.com.au</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2003</year>
+ <holder>Rusty Russell</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ Welcome, to Rusty's Remarkably Unreliable Guide to Kernel
+ Locking issues. This document describes the locking systems in
+ the Linux Kernel in 2.6.
+ </para>
+ <para>
+ With the wide availability of HyperThreading, and <firstterm
+ linkend="gloss-preemption">preemption </firstterm> in the Linux
+ Kernel, everyone hacking on the kernel needs to know the
+ fundamentals of concurrency and locking for
+ <firstterm linkend="gloss-smp"><acronym>SMP</acronym></firstterm>.
+ </para>
+ </chapter>
+
+ <chapter id="races">
+ <title>The Problem With Concurrency</title>
+ <para>
+ (Skip this if you know what a Race Condition is).
+ </para>
+ <para>
+ In a normal program, you can increment a counter like so:
+ </para>
+ <programlisting>
+ very_important_count++;
+ </programlisting>
+
+ <para>
+ This is what they would expect to happen:
+ </para>
+
+ <table>
+ <title>Expected Results</title>
+
+ <tgroup cols="2" align="left">
+
+ <thead>
+ <row>
+ <entry>Instance 1</entry>
+ <entry>Instance 2</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry>read very_important_count (5)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>add 1 (6)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>write very_important_count (6)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>read very_important_count (6)</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>add 1 (7)</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>write very_important_count (7)</entry>
+ </row>
+ </tbody>
+
+ </tgroup>
+ </table>
+
+ <para>
+ This is what might happen:
+ </para>
+
+ <table>
+ <title>Possible Results</title>
+
+ <tgroup cols="2" align="left">
+ <thead>
+ <row>
+ <entry>Instance 1</entry>
+ <entry>Instance 2</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry>read very_important_count (5)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>read very_important_count (5)</entry>
+ </row>
+ <row>
+ <entry>add 1 (6)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>add 1 (6)</entry>
+ </row>
+ <row>
+ <entry>write very_important_count (6)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>write very_important_count (6)</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <sect1 id="race-condition">
+ <title>Race Conditions and Critical Regions</title>
+ <para>
+ This overlap, where the result depends on the
+ relative timing of multiple tasks, is called a <firstterm>race condition</firstterm>.
+ The piece of code containing the concurrency issue is called a
+ <firstterm>critical region</firstterm>. And especially since Linux starting running
+ on SMP machines, they became one of the major issues in kernel
+ design and implementation.
+ </para>
+ <para>
+ Preemption can have the same effect, even if there is only one
+ CPU: by preempting one task during the critical region, we have
+ exactly the same race condition. In this case the thread which
+ preempts might run the critical region itself.
+ </para>
+ <para>
+ The solution is to recognize when these simultaneous accesses
+ occur, and use locks to make sure that only one instance can
+ enter the critical region at any time. There are many
+ friendly primitives in the Linux kernel to help you do this.
+ And then there are the unfriendly primitives, but I'll pretend
+ they don't exist.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="locks">
+ <title>Locking in the Linux Kernel</title>
+
+ <para>
+ If I could give you one piece of advice: never sleep with anyone
+ crazier than yourself. But if I had to give you advice on
+ locking: <emphasis>keep it simple</emphasis>.
+ </para>
+
+ <para>
+ Be reluctant to introduce new locks.
+ </para>
+
+ <para>
+ Strangely enough, this last one is the exact reverse of my advice when
+ you <emphasis>have</emphasis> slept with someone crazier than yourself.
+ And you should think about getting a big dog.
+ </para>
+
+ <sect1 id="lock-intro">
+ <title>Two Main Types of Kernel Locks: Spinlocks and Mutexes</title>
+
+ <para>
+ There are two main types of kernel locks. The fundamental type
+ is the spinlock
+ (<filename class="headerfile">include/asm/spinlock.h</filename>),
+ which is a very simple single-holder lock: if you can't get the
+ spinlock, you keep trying (spinning) until you can. Spinlocks are
+ very small and fast, and can be used anywhere.
+ </para>
+ <para>
+ The second type is a mutex
+ (<filename class="headerfile">include/linux/mutex.h</filename>): it
+ is like a spinlock, but you may block holding a mutex.
+ If you can't lock a mutex, your task will suspend itself, and be woken
+ up when the mutex is released. This means the CPU can do something
+ else while you are waiting. There are many cases when you simply
+ can't sleep (see <xref linkend="sleeping-things"/>), and so have to
+ use a spinlock instead.
+ </para>
+ <para>
+ Neither type of lock is recursive: see
+ <xref linkend="deadlock"/>.
+ </para>
+ </sect1>
+
+ <sect1 id="uniprocessor">
+ <title>Locks and Uniprocessor Kernels</title>
+
+ <para>
+ For kernels compiled without <symbol>CONFIG_SMP</symbol>, and
+ without <symbol>CONFIG_PREEMPT</symbol> spinlocks do not exist at
+ all. This is an excellent design decision: when no-one else can
+ run at the same time, there is no reason to have a lock.
+ </para>
+
+ <para>
+ If the kernel is compiled without <symbol>CONFIG_SMP</symbol>,
+ but <symbol>CONFIG_PREEMPT</symbol> is set, then spinlocks
+ simply disable preemption, which is sufficient to prevent any
+ races. For most purposes, we can think of preemption as
+ equivalent to SMP, and not worry about it separately.
+ </para>
+
+ <para>
+ You should always test your locking code with <symbol>CONFIG_SMP</symbol>
+ and <symbol>CONFIG_PREEMPT</symbol> enabled, even if you don't have an SMP test box, because it
+ will still catch some kinds of locking bugs.
+ </para>
+
+ <para>
+ Mutexes still exist, because they are required for
+ synchronization between <firstterm linkend="gloss-usercontext">user
+ contexts</firstterm>, as we will see below.
+ </para>
+ </sect1>
+
+ <sect1 id="usercontextlocking">
+ <title>Locking Only In User Context</title>
+
+ <para>
+ If you have a data structure which is only ever accessed from
+ user context, then you can use a simple mutex
+ (<filename>include/linux/mutex.h</filename>) to protect it. This
+ is the most trivial case: you initialize the mutex. Then you can
+ call <function>mutex_lock_interruptible()</function> to grab the mutex,
+ and <function>mutex_unlock()</function> to release it. There is also a
+ <function>mutex_lock()</function>, which should be avoided, because it
+ will not return if a signal is received.
+ </para>
+
+ <para>
+ Example: <filename>net/netfilter/nf_sockopt.c</filename> allows
+ registration of new <function>setsockopt()</function> and
+ <function>getsockopt()</function> calls, with
+ <function>nf_register_sockopt()</function>. Registration and
+ de-registration are only done on module load and unload (and boot
+ time, where there is no concurrency), and the list of registrations
+ is only consulted for an unknown <function>setsockopt()</function>
+ or <function>getsockopt()</function> system call. The
+ <varname>nf_sockopt_mutex</varname> is perfect to protect this,
+ especially since the setsockopt and getsockopt calls may well
+ sleep.
+ </para>
+ </sect1>
+
+ <sect1 id="lock-user-bh">
+ <title>Locking Between User Context and Softirqs</title>
+
+ <para>
+ If a <firstterm linkend="gloss-softirq">softirq</firstterm> shares
+ data with user context, you have two problems. Firstly, the current
+ user context can be interrupted by a softirq, and secondly, the
+ critical region could be entered from another CPU. This is where
+ <function>spin_lock_bh()</function>
+ (<filename class="headerfile">include/linux/spinlock.h</filename>) is
+ used. It disables softirqs on that CPU, then grabs the lock.
+ <function>spin_unlock_bh()</function> does the reverse. (The
+ '_bh' suffix is a historical reference to "Bottom Halves", the
+ old name for software interrupts. It should really be
+ called spin_lock_softirq()' in a perfect world).
+ </para>
+
+ <para>
+ Note that you can also use <function>spin_lock_irq()</function>
+ or <function>spin_lock_irqsave()</function> here, which stop
+ hardware interrupts as well: see <xref linkend="hardirq-context"/>.
+ </para>
+
+ <para>
+ This works perfectly for <firstterm linkend="gloss-up"><acronym>UP
+ </acronym></firstterm> as well: the spin lock vanishes, and this macro
+ simply becomes <function>local_bh_disable()</function>
+ (<filename class="headerfile">include/linux/interrupt.h</filename>), which
+ protects you from the softirq being run.
+ </para>
+ </sect1>
+
+ <sect1 id="lock-user-tasklet">
+ <title>Locking Between User Context and Tasklets</title>
+
+ <para>
+ This is exactly the same as above, because <firstterm
+ linkend="gloss-tasklet">tasklets</firstterm> are actually run
+ from a softirq.
+ </para>
+ </sect1>
+
+ <sect1 id="lock-user-timers">
+ <title>Locking Between User Context and Timers</title>
+
+ <para>
+ This, too, is exactly the same as above, because <firstterm
+ linkend="gloss-timers">timers</firstterm> are actually run from
+ a softirq. From a locking point of view, tasklets and timers
+ are identical.
+ </para>
+ </sect1>
+
+ <sect1 id="lock-tasklets">
+ <title>Locking Between Tasklets/Timers</title>
+
+ <para>
+ Sometimes a tasklet or timer might want to share data with
+ another tasklet or timer.
+ </para>
+
+ <sect2 id="lock-tasklets-same">
+ <title>The Same Tasklet/Timer</title>
+ <para>
+ Since a tasklet is never run on two CPUs at once, you don't
+ need to worry about your tasklet being reentrant (running
+ twice at once), even on SMP.
+ </para>
+ </sect2>
+
+ <sect2 id="lock-tasklets-different">
+ <title>Different Tasklets/Timers</title>
+ <para>
+ If another tasklet/timer wants
+ to share data with your tasklet or timer , you will both need to use
+ <function>spin_lock()</function> and
+ <function>spin_unlock()</function> calls.
+ <function>spin_lock_bh()</function> is
+ unnecessary here, as you are already in a tasklet, and
+ none will be run on the same CPU.
+ </para>
+ </sect2>
+ </sect1>
+
+ <sect1 id="lock-softirqs">
+ <title>Locking Between Softirqs</title>
+
+ <para>
+ Often a softirq might
+ want to share data with itself or a tasklet/timer.
+ </para>
+
+ <sect2 id="lock-softirqs-same">
+ <title>The Same Softirq</title>
+
+ <para>
+ The same softirq can run on the other CPUs: you can use a
+ per-CPU array (see <xref linkend="per-cpu"/>) for better
+ performance. If you're going so far as to use a softirq,
+ you probably care about scalable performance enough
+ to justify the extra complexity.
+ </para>
+
+ <para>
+ You'll need to use <function>spin_lock()</function> and
+ <function>spin_unlock()</function> for shared data.
+ </para>
+ </sect2>
+
+ <sect2 id="lock-softirqs-different">
+ <title>Different Softirqs</title>
+
+ <para>
+ You'll need to use <function>spin_lock()</function> and
+ <function>spin_unlock()</function> for shared data, whether it
+ be a timer, tasklet, different softirq or the same or another
+ softirq: any of them could be running on a different CPU.
+ </para>
+ </sect2>
+ </sect1>
+ </chapter>
+
+ <chapter id="hardirq-context">
+ <title>Hard IRQ Context</title>
+
+ <para>
+ Hardware interrupts usually communicate with a
+ tasklet or softirq. Frequently this involves putting work in a
+ queue, which the softirq will take out.
+ </para>
+
+ <sect1 id="hardirq-softirq">
+ <title>Locking Between Hard IRQ and Softirqs/Tasklets</title>
+
+ <para>
+ If a hardware irq handler shares data with a softirq, you have
+ two concerns. Firstly, the softirq processing can be
+ interrupted by a hardware interrupt, and secondly, the
+ critical region could be entered by a hardware interrupt on
+ another CPU. This is where <function>spin_lock_irq()</function> is
+ used. It is defined to disable interrupts on that cpu, then grab
+ the lock. <function>spin_unlock_irq()</function> does the reverse.
+ </para>
+
+ <para>
+ The irq handler does not to use
+ <function>spin_lock_irq()</function>, because the softirq cannot
+ run while the irq handler is running: it can use
+ <function>spin_lock()</function>, which is slightly faster. The
+ only exception would be if a different hardware irq handler uses
+ the same lock: <function>spin_lock_irq()</function> will stop
+ that from interrupting us.
+ </para>
+
+ <para>
+ This works perfectly for UP as well: the spin lock vanishes,
+ and this macro simply becomes <function>local_irq_disable()</function>
+ (<filename class="headerfile">include/asm/smp.h</filename>), which
+ protects you from the softirq/tasklet/BH being run.
+ </para>
+
+ <para>
+ <function>spin_lock_irqsave()</function>
+ (<filename>include/linux/spinlock.h</filename>) is a variant
+ which saves whether interrupts were on or off in a flags word,
+ which is passed to <function>spin_unlock_irqrestore()</function>. This
+ means that the same code can be used inside an hard irq handler (where
+ interrupts are already off) and in softirqs (where the irq
+ disabling is required).
+ </para>
+
+ <para>
+ Note that softirqs (and hence tasklets and timers) are run on
+ return from hardware interrupts, so
+ <function>spin_lock_irq()</function> also stops these. In that
+ sense, <function>spin_lock_irqsave()</function> is the most
+ general and powerful locking function.
+ </para>
+
+ </sect1>
+ <sect1 id="hardirq-hardirq">
+ <title>Locking Between Two Hard IRQ Handlers</title>
+ <para>
+ It is rare to have to share data between two IRQ handlers, but
+ if you do, <function>spin_lock_irqsave()</function> should be
+ used: it is architecture-specific whether all interrupts are
+ disabled inside irq handlers themselves.
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="cheatsheet">
+ <title>Cheat Sheet For Locking</title>
+ <para>
+ Pete Zaitcev gives the following summary:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ If you are in a process context (any syscall) and want to
+ lock other process out, use a mutex. You can take a mutex
+ and sleep (<function>copy_from_user*(</function> or
+ <function>kmalloc(x,GFP_KERNEL)</function>).
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Otherwise (== data can be touched in an interrupt), use
+ <function>spin_lock_irqsave()</function> and
+ <function>spin_unlock_irqrestore()</function>.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Avoid holding spinlock for more than 5 lines of code and
+ across any function call (except accessors like
+ <function>readb</function>).
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <sect1 id="minimum-lock-reqirements">
+ <title>Table of Minimum Requirements</title>
+
+ <para> The following table lists the <emphasis>minimum</emphasis>
+ locking requirements between various contexts. In some cases,
+ the same context can only be running on one CPU at a time, so
+ no locking is required for that context (eg. a particular
+ thread can only run on one CPU at a time, but if it needs
+ shares data with another thread, locking is required).
+ </para>
+ <para>
+ Remember the advice above: you can always use
+ <function>spin_lock_irqsave()</function>, which is a superset
+ of all other spinlock primitives.
+ </para>
+
+ <table>
+<title>Table of Locking Requirements</title>
+<tgroup cols="11">
+<tbody>
+
+<row>
+<entry></entry>
+<entry>IRQ Handler A</entry>
+<entry>IRQ Handler B</entry>
+<entry>Softirq A</entry>
+<entry>Softirq B</entry>
+<entry>Tasklet A</entry>
+<entry>Tasklet B</entry>
+<entry>Timer A</entry>
+<entry>Timer B</entry>
+<entry>User Context A</entry>
+<entry>User Context B</entry>
+</row>
+
+<row>
+<entry>IRQ Handler A</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>IRQ Handler B</entry>
+<entry>SLIS</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Softirq A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+</row>
+
+<row>
+<entry>Softirq B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+</row>
+
+<row>
+<entry>Tasklet A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Tasklet B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Timer A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Timer B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>User Context A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>User Context B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>MLI</entry>
+<entry>None</entry>
+</row>
+
+</tbody>
+</tgroup>
+</table>
+
+ <table>
+<title>Legend for Locking Requirements Table</title>
+<tgroup cols="2">
+<tbody>
+
+<row>
+<entry>SLIS</entry>
+<entry>spin_lock_irqsave</entry>
+</row>
+<row>
+<entry>SLI</entry>
+<entry>spin_lock_irq</entry>
+</row>
+<row>
+<entry>SL</entry>
+<entry>spin_lock</entry>
+</row>
+<row>
+<entry>SLBH</entry>
+<entry>spin_lock_bh</entry>
+</row>
+<row>
+<entry>MLI</entry>
+<entry>mutex_lock_interruptible</entry>
+</row>
+
+</tbody>
+</tgroup>
+</table>
+
+</sect1>
+</chapter>
+
+<chapter id="trylock-functions">
+ <title>The trylock Functions</title>
+ <para>
+ There are functions that try to acquire a lock only once and immediately
+ return a value telling about success or failure to acquire the lock.
+ They can be used if you need no access to the data protected with the lock
+ when some other thread is holding the lock. You should acquire the lock
+ later if you then need access to the data protected with the lock.
+ </para>
+
+ <para>
+ <function>spin_trylock()</function> does not spin but returns non-zero if
+ it acquires the spinlock on the first try or 0 if not. This function can
+ be used in all contexts like <function>spin_lock</function>: you must have
+ disabled the contexts that might interrupt you and acquire the spin lock.
+ </para>
+
+ <para>
+ <function>mutex_trylock()</function> does not suspend your task
+ but returns non-zero if it could lock the mutex on the first try
+ or 0 if not. This function cannot be safely used in hardware or software
+ interrupt contexts despite not sleeping.
+ </para>
+</chapter>
+
+ <chapter id="Examples">
+ <title>Common Examples</title>
+ <para>
+Let's step through a simple example: a cache of number to name
+mappings. The cache keeps a count of how often each of the objects is
+used, and when it gets full, throws out the least used one.
+
+ </para>
+
+ <sect1 id="examples-usercontext">
+ <title>All In User Context</title>
+ <para>
+For our first example, we assume that all operations are in user
+context (ie. from system calls), so we can sleep. This means we can
+use a mutex to protect the cache and all the objects within
+it. Here's the code:
+ </para>
+
+ <programlisting>
+#include &lt;linux/list.h&gt;
+#include &lt;linux/slab.h&gt;
+#include &lt;linux/string.h&gt;
+#include &lt;linux/mutex.h&gt;
+#include &lt;asm/errno.h&gt;
+
+struct object
+{
+ struct list_head list;
+ int id;
+ char name[32];
+ int popularity;
+};
+
+/* Protects the cache, cache_num, and the objects within it */
+static DEFINE_MUTEX(cache_lock);
+static LIST_HEAD(cache);
+static unsigned int cache_num = 0;
+#define MAX_CACHE_SIZE 10
+
+/* Must be holding cache_lock */
+static struct object *__cache_find(int id)
+{
+ struct object *i;
+
+ list_for_each_entry(i, &amp;cache, list)
+ if (i-&gt;id == id) {
+ i-&gt;popularity++;
+ return i;
+ }
+ return NULL;
+}
+
+/* Must be holding cache_lock */
+static void __cache_delete(struct object *obj)
+{
+ BUG_ON(!obj);
+ list_del(&amp;obj-&gt;list);
+ kfree(obj);
+ cache_num--;
+}
+
+/* Must be holding cache_lock */
+static void __cache_add(struct object *obj)
+{
+ list_add(&amp;obj-&gt;list, &amp;cache);
+ if (++cache_num > MAX_CACHE_SIZE) {
+ struct object *i, *outcast = NULL;
+ list_for_each_entry(i, &amp;cache, list) {
+ if (!outcast || i-&gt;popularity &lt; outcast-&gt;popularity)
+ outcast = i;
+ }
+ __cache_delete(outcast);
+ }
+}
+
+int cache_add(int id, const char *name)
+{
+ struct object *obj;
+
+ if ((obj = kmalloc(sizeof(*obj), GFP_KERNEL)) == NULL)
+ return -ENOMEM;
+
+ strlcpy(obj-&gt;name, name, sizeof(obj-&gt;name));
+ obj-&gt;id = id;
+ obj-&gt;popularity = 0;
+
+ mutex_lock(&amp;cache_lock);
+ __cache_add(obj);
+ mutex_unlock(&amp;cache_lock);
+ return 0;
+}
+
+void cache_delete(int id)
+{
+ mutex_lock(&amp;cache_lock);
+ __cache_delete(__cache_find(id));
+ mutex_unlock(&amp;cache_lock);
+}
+
+int cache_find(int id, char *name)
+{
+ struct object *obj;
+ int ret = -ENOENT;
+
+ mutex_lock(&amp;cache_lock);
+ obj = __cache_find(id);
+ if (obj) {
+ ret = 0;
+ strcpy(name, obj-&gt;name);
+ }
+ mutex_unlock(&amp;cache_lock);
+ return ret;
+}
+</programlisting>
+
+ <para>
+Note that we always make sure we have the cache_lock when we add,
+delete, or look up the cache: both the cache infrastructure itself and
+the contents of the objects are protected by the lock. In this case
+it's easy, since we copy the data for the user, and never let them
+access the objects directly.
+ </para>
+ <para>
+There is a slight (and common) optimization here: in
+<function>cache_add</function> we set up the fields of the object
+before grabbing the lock. This is safe, as no-one else can access it
+until we put it in cache.
+ </para>
+ </sect1>
+
+ <sect1 id="examples-interrupt">
+ <title>Accessing From Interrupt Context</title>
+ <para>
+Now consider the case where <function>cache_find</function> can be
+called from interrupt context: either a hardware interrupt or a
+softirq. An example would be a timer which deletes object from the
+cache.
+ </para>
+ <para>
+The change is shown below, in standard patch format: the
+<symbol>-</symbol> are lines which are taken away, and the
+<symbol>+</symbol> are lines which are added.
+ </para>
+<programlisting>
+--- cache.c.usercontext 2003-12-09 13:58:54.000000000 +1100
++++ cache.c.interrupt 2003-12-09 14:07:49.000000000 +1100
+@@ -12,7 +12,7 @@
+ int popularity;
+ };
+
+-static DEFINE_MUTEX(cache_lock);
++static DEFINE_SPINLOCK(cache_lock);
+ static LIST_HEAD(cache);
+ static unsigned int cache_num = 0;
+ #define MAX_CACHE_SIZE 10
+@@ -55,6 +55,7 @@
+ int cache_add(int id, const char *name)
+ {
+ struct object *obj;
++ unsigned long flags;
+
+ if ((obj = kmalloc(sizeof(*obj), GFP_KERNEL)) == NULL)
+ return -ENOMEM;
+@@ -63,30 +64,33 @@
+ obj-&gt;id = id;
+ obj-&gt;popularity = 0;
+
+- mutex_lock(&amp;cache_lock);
++ spin_lock_irqsave(&amp;cache_lock, flags);
+ __cache_add(obj);
+- mutex_unlock(&amp;cache_lock);
++ spin_unlock_irqrestore(&amp;cache_lock, flags);
+ return 0;
+ }
+
+ void cache_delete(int id)
+ {
+- mutex_lock(&amp;cache_lock);
++ unsigned long flags;
++
++ spin_lock_irqsave(&amp;cache_lock, flags);
+ __cache_delete(__cache_find(id));
+- mutex_unlock(&amp;cache_lock);
++ spin_unlock_irqrestore(&amp;cache_lock, flags);
+ }
+
+ int cache_find(int id, char *name)
+ {
+ struct object *obj;
+ int ret = -ENOENT;
++ unsigned long flags;
+
+- mutex_lock(&amp;cache_lock);
++ spin_lock_irqsave(&amp;cache_lock, flags);
+ obj = __cache_find(id);
+ if (obj) {
+ ret = 0;
+ strcpy(name, obj-&gt;name);
+ }
+- mutex_unlock(&amp;cache_lock);
++ spin_unlock_irqrestore(&amp;cache_lock, flags);
+ return ret;
+ }
+</programlisting>
+
+ <para>
+Note that the <function>spin_lock_irqsave</function> will turn off
+interrupts if they are on, otherwise does nothing (if we are already
+in an interrupt handler), hence these functions are safe to call from
+any context.
+ </para>
+ <para>
+Unfortunately, <function>cache_add</function> calls
+<function>kmalloc</function> with the <symbol>GFP_KERNEL</symbol>
+flag, which is only legal in user context. I have assumed that
+<function>cache_add</function> is still only called in user context,
+otherwise this should become a parameter to
+<function>cache_add</function>.
+ </para>
+ </sect1>
+ <sect1 id="examples-refcnt">
+ <title>Exposing Objects Outside This File</title>
+ <para>
+If our objects contained more information, it might not be sufficient
+to copy the information in and out: other parts of the code might want
+to keep pointers to these objects, for example, rather than looking up
+the id every time. This produces two problems.
+ </para>
+ <para>
+The first problem is that we use the <symbol>cache_lock</symbol> to
+protect objects: we'd need to make this non-static so the rest of the
+code can use it. This makes locking trickier, as it is no longer all
+in one place.
+ </para>
+ <para>
+The second problem is the lifetime problem: if another structure keeps
+a pointer to an object, it presumably expects that pointer to remain
+valid. Unfortunately, this is only guaranteed while you hold the
+lock, otherwise someone might call <function>cache_delete</function>
+and even worse, add another object, re-using the same address.
+ </para>
+ <para>
+As there is only one lock, you can't hold it forever: no-one else would
+get any work done.
+ </para>
+ <para>
+The solution to this problem is to use a reference count: everyone who
+has a pointer to the object increases it when they first get the
+object, and drops the reference count when they're finished with it.
+Whoever drops it to zero knows it is unused, and can actually delete it.
+ </para>
+ <para>
+Here is the code:
+ </para>
+
+<programlisting>
+--- cache.c.interrupt 2003-12-09 14:25:43.000000000 +1100
++++ cache.c.refcnt 2003-12-09 14:33:05.000000000 +1100
+@@ -7,6 +7,7 @@
+ struct object
+ {
+ struct list_head list;
++ unsigned int refcnt;
+ int id;
+ char name[32];
+ int popularity;
+@@ -17,6 +18,35 @@
+ static unsigned int cache_num = 0;
+ #define MAX_CACHE_SIZE 10
+
++static void __object_put(struct object *obj)
++{
++ if (--obj-&gt;refcnt == 0)
++ kfree(obj);
++}
++
++static void __object_get(struct object *obj)
++{
++ obj-&gt;refcnt++;
++}
++
++void object_put(struct object *obj)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&amp;cache_lock, flags);
++ __object_put(obj);
++ spin_unlock_irqrestore(&amp;cache_lock, flags);
++}
++
++void object_get(struct object *obj)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&amp;cache_lock, flags);
++ __object_get(obj);
++ spin_unlock_irqrestore(&amp;cache_lock, flags);
++}
++
+ /* Must be holding cache_lock */
+ static struct object *__cache_find(int id)
+ {
+@@ -35,6 +65,7 @@
+ {
+ BUG_ON(!obj);
+ list_del(&amp;obj-&gt;list);
++ __object_put(obj);
+ cache_num--;
+ }
+
+@@ -63,6 +94,7 @@
+ strlcpy(obj-&gt;name, name, sizeof(obj-&gt;name));
+ obj-&gt;id = id;
+ obj-&gt;popularity = 0;
++ obj-&gt;refcnt = 1; /* The cache holds a reference */
+
+ spin_lock_irqsave(&amp;cache_lock, flags);
+ __cache_add(obj);
+@@ -79,18 +111,15 @@
+ spin_unlock_irqrestore(&amp;cache_lock, flags);
+ }
+
+-int cache_find(int id, char *name)
++struct object *cache_find(int id)
+ {
+ struct object *obj;
+- int ret = -ENOENT;
+ unsigned long flags;
+
+ spin_lock_irqsave(&amp;cache_lock, flags);
+ obj = __cache_find(id);
+- if (obj) {
+- ret = 0;
+- strcpy(name, obj-&gt;name);
+- }
++ if (obj)
++ __object_get(obj);
+ spin_unlock_irqrestore(&amp;cache_lock, flags);
+- return ret;
++ return obj;
+ }
+</programlisting>
+
+<para>
+We encapsulate the reference counting in the standard 'get' and 'put'
+functions. Now we can return the object itself from
+<function>cache_find</function> which has the advantage that the user
+can now sleep holding the object (eg. to
+<function>copy_to_user</function> to name to userspace).
+</para>
+<para>
+The other point to note is that I said a reference should be held for
+every pointer to the object: thus the reference count is 1 when first
+inserted into the cache. In some versions the framework does not hold
+a reference count, but they are more complicated.
+</para>
+
+ <sect2 id="examples-refcnt-atomic">
+ <title>Using Atomic Operations For The Reference Count</title>
+<para>
+In practice, <type>atomic_t</type> would usually be used for
+<structfield>refcnt</structfield>. There are a number of atomic
+operations defined in
+
+<filename class="headerfile">include/asm/atomic.h</filename>: these are
+guaranteed to be seen atomically from all CPUs in the system, so no
+lock is required. In this case, it is simpler than using spinlocks,
+although for anything non-trivial using spinlocks is clearer. The
+<function>atomic_inc</function> and
+<function>atomic_dec_and_test</function> are used instead of the
+standard increment and decrement operators, and the lock is no longer
+used to protect the reference count itself.
+</para>
+
+<programlisting>
+--- cache.c.refcnt 2003-12-09 15:00:35.000000000 +1100
++++ cache.c.refcnt-atomic 2003-12-11 15:49:42.000000000 +1100
+@@ -7,7 +7,7 @@
+ struct object
+ {
+ struct list_head list;
+- unsigned int refcnt;
++ atomic_t refcnt;
+ int id;
+ char name[32];
+ int popularity;
+@@ -18,33 +18,15 @@
+ static unsigned int cache_num = 0;
+ #define MAX_CACHE_SIZE 10
+
+-static void __object_put(struct object *obj)
+-{
+- if (--obj-&gt;refcnt == 0)
+- kfree(obj);
+-}
+-
+-static void __object_get(struct object *obj)
+-{
+- obj-&gt;refcnt++;
+-}
+-
+ void object_put(struct object *obj)
+ {
+- unsigned long flags;
+-
+- spin_lock_irqsave(&amp;cache_lock, flags);
+- __object_put(obj);
+- spin_unlock_irqrestore(&amp;cache_lock, flags);
++ if (atomic_dec_and_test(&amp;obj-&gt;refcnt))
++ kfree(obj);
+ }
+
+ void object_get(struct object *obj)
+ {
+- unsigned long flags;
+-
+- spin_lock_irqsave(&amp;cache_lock, flags);
+- __object_get(obj);
+- spin_unlock_irqrestore(&amp;cache_lock, flags);
++ atomic_inc(&amp;obj-&gt;refcnt);
+ }
+
+ /* Must be holding cache_lock */
+@@ -65,7 +47,7 @@
+ {
+ BUG_ON(!obj);
+ list_del(&amp;obj-&gt;list);
+- __object_put(obj);
++ object_put(obj);
+ cache_num--;
+ }
+
+@@ -94,7 +76,7 @@
+ strlcpy(obj-&gt;name, name, sizeof(obj-&gt;name));
+ obj-&gt;id = id;
+ obj-&gt;popularity = 0;
+- obj-&gt;refcnt = 1; /* The cache holds a reference */
++ atomic_set(&amp;obj-&gt;refcnt, 1); /* The cache holds a reference */
+
+ spin_lock_irqsave(&amp;cache_lock, flags);
+ __cache_add(obj);
+@@ -119,7 +101,7 @@
+ spin_lock_irqsave(&amp;cache_lock, flags);
+ obj = __cache_find(id);
+ if (obj)
+- __object_get(obj);
++ object_get(obj);
+ spin_unlock_irqrestore(&amp;cache_lock, flags);
+ return obj;
+ }
+</programlisting>
+</sect2>
+</sect1>
+
+ <sect1 id="examples-lock-per-obj">
+ <title>Protecting The Objects Themselves</title>
+ <para>
+In these examples, we assumed that the objects (except the reference
+counts) never changed once they are created. If we wanted to allow
+the name to change, there are three possibilities:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+You can make <symbol>cache_lock</symbol> non-static, and tell people
+to grab that lock before changing the name in any object.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+You can provide a <function>cache_obj_rename</function> which grabs
+this lock and changes the name for the caller, and tell everyone to
+use that function.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+You can make the <symbol>cache_lock</symbol> protect only the cache
+itself, and use another lock to protect the name.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+Theoretically, you can make the locks as fine-grained as one lock for
+every field, for every object. In practice, the most common variants
+are:
+</para>
+ <itemizedlist>
+ <listitem>
+ <para>
+One lock which protects the infrastructure (the <symbol>cache</symbol>
+list in this example) and all the objects. This is what we have done
+so far.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+One lock which protects the infrastructure (including the list
+pointers inside the objects), and one lock inside the object which
+protects the rest of that object.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+Multiple locks to protect the infrastructure (eg. one lock per hash
+chain), possibly with a separate per-object lock.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+<para>
+Here is the "lock-per-object" implementation:
+</para>
+<programlisting>
+--- cache.c.refcnt-atomic 2003-12-11 15:50:54.000000000 +1100
++++ cache.c.perobjectlock 2003-12-11 17:15:03.000000000 +1100
+@@ -6,11 +6,17 @@
+
+ struct object
+ {
++ /* These two protected by cache_lock. */
+ struct list_head list;
++ int popularity;
++
+ atomic_t refcnt;
++
++ /* Doesn't change once created. */
+ int id;
++
++ spinlock_t lock; /* Protects the name */
+ char name[32];
+- int popularity;
+ };
+
+ static DEFINE_SPINLOCK(cache_lock);
+@@ -77,6 +84,7 @@
+ obj-&gt;id = id;
+ obj-&gt;popularity = 0;
+ atomic_set(&amp;obj-&gt;refcnt, 1); /* The cache holds a reference */
++ spin_lock_init(&amp;obj-&gt;lock);
+
+ spin_lock_irqsave(&amp;cache_lock, flags);
+ __cache_add(obj);
+</programlisting>
+
+<para>
+Note that I decide that the <structfield>popularity</structfield>
+count should be protected by the <symbol>cache_lock</symbol> rather
+than the per-object lock: this is because it (like the
+<structname>struct list_head</structname> inside the object) is
+logically part of the infrastructure. This way, I don't need to grab
+the lock of every object in <function>__cache_add</function> when
+seeking the least popular.
+</para>
+
+<para>
+I also decided that the <structfield>id</structfield> member is
+unchangeable, so I don't need to grab each object lock in
+<function>__cache_find()</function> to examine the
+<structfield>id</structfield>: the object lock is only used by a
+caller who wants to read or write the <structfield>name</structfield>
+field.
+</para>
+
+<para>
+Note also that I added a comment describing what data was protected by
+which locks. This is extremely important, as it describes the runtime
+behavior of the code, and can be hard to gain from just reading. And
+as Alan Cox says, <quote>Lock data, not code</quote>.
+</para>
+</sect1>
+</chapter>
+
+ <chapter id="common-problems">
+ <title>Common Problems</title>
+ <sect1 id="deadlock">
+ <title>Deadlock: Simple and Advanced</title>
+
+ <para>
+ There is a coding bug where a piece of code tries to grab a
+ spinlock twice: it will spin forever, waiting for the lock to
+ be released (spinlocks, rwlocks and mutexes are not
+ recursive in Linux). This is trivial to diagnose: not a
+ stay-up-five-nights-talk-to-fluffy-code-bunnies kind of
+ problem.
+ </para>
+
+ <para>
+ For a slightly more complex case, imagine you have a region
+ shared by a softirq and user context. If you use a
+ <function>spin_lock()</function> call to protect it, it is
+ possible that the user context will be interrupted by the softirq
+ while it holds the lock, and the softirq will then spin
+ forever trying to get the same lock.
+ </para>
+
+ <para>
+ Both of these are called deadlock, and as shown above, it can
+ occur even with a single CPU (although not on UP compiles,
+ since spinlocks vanish on kernel compiles with
+ <symbol>CONFIG_SMP</symbol>=n. You'll still get data corruption
+ in the second example).
+ </para>
+
+ <para>
+ This complete lockup is easy to diagnose: on SMP boxes the
+ watchdog timer or compiling with <symbol>DEBUG_SPINLOCK</symbol> set
+ (<filename>include/linux/spinlock.h</filename>) will show this up
+ immediately when it happens.
+ </para>
+
+ <para>
+ A more complex problem is the so-called 'deadly embrace',
+ involving two or more locks. Say you have a hash table: each
+ entry in the table is a spinlock, and a chain of hashed
+ objects. Inside a softirq handler, you sometimes want to
+ alter an object from one place in the hash to another: you
+ grab the spinlock of the old hash chain and the spinlock of
+ the new hash chain, and delete the object from the old one,
+ and insert it in the new one.
+ </para>
+
+ <para>
+ There are two problems here. First, if your code ever
+ tries to move the object to the same chain, it will deadlock
+ with itself as it tries to lock it twice. Secondly, if the
+ same softirq on another CPU is trying to move another object
+ in the reverse direction, the following could happen:
+ </para>
+
+ <table>
+ <title>Consequences</title>
+
+ <tgroup cols="2" align="left">
+
+ <thead>
+ <row>
+ <entry>CPU 1</entry>
+ <entry>CPU 2</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry>Grab lock A -&gt; OK</entry>
+ <entry>Grab lock B -&gt; OK</entry>
+ </row>
+ <row>
+ <entry>Grab lock B -&gt; spin</entry>
+ <entry>Grab lock A -&gt; spin</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>
+ The two CPUs will spin forever, waiting for the other to give up
+ their lock. It will look, smell, and feel like a crash.
+ </para>
+ </sect1>
+
+ <sect1 id="techs-deadlock-prevent">
+ <title>Preventing Deadlock</title>
+
+ <para>
+ Textbooks will tell you that if you always lock in the same
+ order, you will never get this kind of deadlock. Practice
+ will tell you that this approach doesn't scale: when I
+ create a new lock, I don't understand enough of the kernel
+ to figure out where in the 5000 lock hierarchy it will fit.
+ </para>
+
+ <para>
+ The best locks are encapsulated: they never get exposed in
+ headers, and are never held around calls to non-trivial
+ functions outside the same file. You can read through this
+ code and see that it will never deadlock, because it never
+ tries to grab another lock while it has that one. People
+ using your code don't even need to know you are using a
+ lock.
+ </para>
+
+ <para>
+ A classic problem here is when you provide callbacks or
+ hooks: if you call these with the lock held, you risk simple
+ deadlock, or a deadly embrace (who knows what the callback
+ will do?). Remember, the other programmers are out to get
+ you, so don't do this.
+ </para>
+
+ <sect2 id="techs-deadlock-overprevent">
+ <title>Overzealous Prevention Of Deadlocks</title>
+
+ <para>
+ Deadlocks are problematic, but not as bad as data
+ corruption. Code which grabs a read lock, searches a list,
+ fails to find what it wants, drops the read lock, grabs a
+ write lock and inserts the object has a race condition.
+ </para>
+
+ <para>
+ If you don't see why, please stay the fuck away from my code.
+ </para>
+ </sect2>
+ </sect1>
+
+ <sect1 id="racing-timers">
+ <title>Racing Timers: A Kernel Pastime</title>
+
+ <para>
+ Timers can produce their own special problems with races.
+ Consider a collection of objects (list, hash, etc) where each
+ object has a timer which is due to destroy it.
+ </para>
+
+ <para>
+ If you want to destroy the entire collection (say on module
+ removal), you might do the following:
+ </para>
+
+ <programlisting>
+ /* THIS CODE BAD BAD BAD BAD: IF IT WAS ANY WORSE IT WOULD USE
+ HUNGARIAN NOTATION */
+ spin_lock_bh(&amp;list_lock);
+
+ while (list) {
+ struct foo *next = list-&gt;next;
+ del_timer(&amp;list-&gt;timer);
+ kfree(list);
+ list = next;
+ }
+
+ spin_unlock_bh(&amp;list_lock);
+ </programlisting>
+
+ <para>
+ Sooner or later, this will crash on SMP, because a timer can
+ have just gone off before the <function>spin_lock_bh()</function>,
+ and it will only get the lock after we
+ <function>spin_unlock_bh()</function>, and then try to free
+ the element (which has already been freed!).
+ </para>
+
+ <para>
+ This can be avoided by checking the result of
+ <function>del_timer()</function>: if it returns
+ <returnvalue>1</returnvalue>, the timer has been deleted.
+ If <returnvalue>0</returnvalue>, it means (in this
+ case) that it is currently running, so we can do:
+ </para>
+
+ <programlisting>
+ retry:
+ spin_lock_bh(&amp;list_lock);
+
+ while (list) {
+ struct foo *next = list-&gt;next;
+ if (!del_timer(&amp;list-&gt;timer)) {
+ /* Give timer a chance to delete this */
+ spin_unlock_bh(&amp;list_lock);
+ goto retry;
+ }
+ kfree(list);
+ list = next;
+ }
+
+ spin_unlock_bh(&amp;list_lock);
+ </programlisting>
+
+ <para>
+ Another common problem is deleting timers which restart
+ themselves (by calling <function>add_timer()</function> at the end
+ of their timer function). Because this is a fairly common case
+ which is prone to races, you should use <function>del_timer_sync()</function>
+ (<filename class="headerfile">include/linux/timer.h</filename>)
+ to handle this case. It returns the number of times the timer
+ had to be deleted before we finally stopped it from adding itself back
+ in.
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="Efficiency">
+ <title>Locking Speed</title>
+
+ <para>
+There are three main things to worry about when considering speed of
+some code which does locking. First is concurrency: how many things
+are going to be waiting while someone else is holding a lock. Second
+is the time taken to actually acquire and release an uncontended lock.
+Third is using fewer, or smarter locks. I'm assuming that the lock is
+used fairly often: otherwise, you wouldn't be concerned about
+efficiency.
+</para>
+ <para>
+Concurrency depends on how long the lock is usually held: you should
+hold the lock for as long as needed, but no longer. In the cache
+example, we always create the object without the lock held, and then
+grab the lock only when we are ready to insert it in the list.
+</para>
+ <para>
+Acquisition times depend on how much damage the lock operations do to
+the pipeline (pipeline stalls) and how likely it is that this CPU was
+the last one to grab the lock (ie. is the lock cache-hot for this
+CPU): on a machine with more CPUs, this likelihood drops fast.
+Consider a 700MHz Intel Pentium III: an instruction takes about 0.7ns,
+an atomic increment takes about 58ns, a lock which is cache-hot on
+this CPU takes 160ns, and a cacheline transfer from another CPU takes
+an additional 170 to 360ns. (These figures from Paul McKenney's
+<ulink url="http://www.linuxjournal.com/article.php?sid=6993"> Linux
+Journal RCU article</ulink>).
+</para>
+ <para>
+These two aims conflict: holding a lock for a short time might be done
+by splitting locks into parts (such as in our final per-object-lock
+example), but this increases the number of lock acquisitions, and the
+results are often slower than having a single lock. This is another
+reason to advocate locking simplicity.
+</para>
+ <para>
+The third concern is addressed below: there are some methods to reduce
+the amount of locking which needs to be done.
+</para>
+
+ <sect1 id="efficiency-rwlocks">
+ <title>Read/Write Lock Variants</title>
+
+ <para>
+ Both spinlocks and mutexes have read/write variants:
+ <type>rwlock_t</type> and <structname>struct rw_semaphore</structname>.
+ These divide users into two classes: the readers and the writers. If
+ you are only reading the data, you can get a read lock, but to write to
+ the data you need the write lock. Many people can hold a read lock,
+ but a writer must be sole holder.
+ </para>
+
+ <para>
+ If your code divides neatly along reader/writer lines (as our
+ cache code does), and the lock is held by readers for
+ significant lengths of time, using these locks can help. They
+ are slightly slower than the normal locks though, so in practice
+ <type>rwlock_t</type> is not usually worthwhile.
+ </para>
+ </sect1>
+
+ <sect1 id="efficiency-read-copy-update">
+ <title>Avoiding Locks: Read Copy Update</title>
+
+ <para>
+ There is a special method of read/write locking called Read Copy
+ Update. Using RCU, the readers can avoid taking a lock
+ altogether: as we expect our cache to be read more often than
+ updated (otherwise the cache is a waste of time), it is a
+ candidate for this optimization.
+ </para>
+
+ <para>
+ How do we get rid of read locks? Getting rid of read locks
+ means that writers may be changing the list underneath the
+ readers. That is actually quite simple: we can read a linked
+ list while an element is being added if the writer adds the
+ element very carefully. For example, adding
+ <symbol>new</symbol> to a single linked list called
+ <symbol>list</symbol>:
+ </para>
+
+ <programlisting>
+ new-&gt;next = list-&gt;next;
+ wmb();
+ list-&gt;next = new;
+ </programlisting>
+
+ <para>
+ The <function>wmb()</function> is a write memory barrier. It
+ ensures that the first operation (setting the new element's
+ <symbol>next</symbol> pointer) is complete and will be seen by
+ all CPUs, before the second operation is (putting the new
+ element into the list). This is important, since modern
+ compilers and modern CPUs can both reorder instructions unless
+ told otherwise: we want a reader to either not see the new
+ element at all, or see the new element with the
+ <symbol>next</symbol> pointer correctly pointing at the rest of
+ the list.
+ </para>
+ <para>
+ Fortunately, there is a function to do this for standard
+ <structname>struct list_head</structname> lists:
+ <function>list_add_rcu()</function>
+ (<filename>include/linux/list.h</filename>).
+ </para>
+ <para>
+ Removing an element from the list is even simpler: we replace
+ the pointer to the old element with a pointer to its successor,
+ and readers will either see it, or skip over it.
+ </para>
+ <programlisting>
+ list-&gt;next = old-&gt;next;
+ </programlisting>
+ <para>
+ There is <function>list_del_rcu()</function>
+ (<filename>include/linux/list.h</filename>) which does this (the
+ normal version poisons the old object, which we don't want).
+ </para>
+ <para>
+ The reader must also be careful: some CPUs can look through the
+ <symbol>next</symbol> pointer to start reading the contents of
+ the next element early, but don't realize that the pre-fetched
+ contents is wrong when the <symbol>next</symbol> pointer changes
+ underneath them. Once again, there is a
+ <function>list_for_each_entry_rcu()</function>
+ (<filename>include/linux/list.h</filename>) to help you. Of
+ course, writers can just use
+ <function>list_for_each_entry()</function>, since there cannot
+ be two simultaneous writers.
+ </para>
+ <para>
+ Our final dilemma is this: when can we actually destroy the
+ removed element? Remember, a reader might be stepping through
+ this element in the list right now: if we free this element and
+ the <symbol>next</symbol> pointer changes, the reader will jump
+ off into garbage and crash. We need to wait until we know that
+ all the readers who were traversing the list when we deleted the
+ element are finished. We use <function>call_rcu()</function> to
+ register a callback which will actually destroy the object once
+ all pre-existing readers are finished. Alternatively,
+ <function>synchronize_rcu()</function> may be used to block until
+ all pre-existing are finished.
+ </para>
+ <para>
+ But how does Read Copy Update know when the readers are
+ finished? The method is this: firstly, the readers always
+ traverse the list inside
+ <function>rcu_read_lock()</function>/<function>rcu_read_unlock()</function>
+ pairs: these simply disable preemption so the reader won't go to
+ sleep while reading the list.
+ </para>
+ <para>
+ RCU then waits until every other CPU has slept at least once:
+ since readers cannot sleep, we know that any readers which were
+ traversing the list during the deletion are finished, and the
+ callback is triggered. The real Read Copy Update code is a
+ little more optimized than this, but this is the fundamental
+ idea.
+ </para>
+
+<programlisting>
+--- cache.c.perobjectlock 2003-12-11 17:15:03.000000000 +1100
++++ cache.c.rcupdate 2003-12-11 17:55:14.000000000 +1100
+@@ -1,15 +1,18 @@
+ #include &lt;linux/list.h&gt;
+ #include &lt;linux/slab.h&gt;
+ #include &lt;linux/string.h&gt;
++#include &lt;linux/rcupdate.h&gt;
+ #include &lt;linux/mutex.h&gt;
+ #include &lt;asm/errno.h&gt;
+
+ struct object
+ {
+- /* These two protected by cache_lock. */
++ /* This is protected by RCU */
+ struct list_head list;
+ int popularity;
+
++ struct rcu_head rcu;
++
+ atomic_t refcnt;
+
+ /* Doesn't change once created. */
+@@ -40,7 +43,7 @@
+ {
+ struct object *i;
+
+- list_for_each_entry(i, &amp;cache, list) {
++ list_for_each_entry_rcu(i, &amp;cache, list) {
+ if (i-&gt;id == id) {
+ i-&gt;popularity++;
+ return i;
+@@ -49,19 +52,25 @@
+ return NULL;
+ }
+
++/* Final discard done once we know no readers are looking. */
++static void cache_delete_rcu(void *arg)
++{
++ object_put(arg);
++}
++
+ /* Must be holding cache_lock */
+ static void __cache_delete(struct object *obj)
+ {
+ BUG_ON(!obj);
+- list_del(&amp;obj-&gt;list);
+- object_put(obj);
++ list_del_rcu(&amp;obj-&gt;list);
+ cache_num--;
++ call_rcu(&amp;obj-&gt;rcu, cache_delete_rcu);
+ }
+
+ /* Must be holding cache_lock */
+ static void __cache_add(struct object *obj)
+ {
+- list_add(&amp;obj-&gt;list, &amp;cache);
++ list_add_rcu(&amp;obj-&gt;list, &amp;cache);
+ if (++cache_num > MAX_CACHE_SIZE) {
+ struct object *i, *outcast = NULL;
+ list_for_each_entry(i, &amp;cache, list) {
+@@ -104,12 +114,11 @@
+ struct object *cache_find(int id)
+ {
+ struct object *obj;
+- unsigned long flags;
+
+- spin_lock_irqsave(&amp;cache_lock, flags);
++ rcu_read_lock();
+ obj = __cache_find(id);
+ if (obj)
+ object_get(obj);
+- spin_unlock_irqrestore(&amp;cache_lock, flags);
++ rcu_read_unlock();
+ return obj;
+ }
+</programlisting>
+
+<para>
+Note that the reader will alter the
+<structfield>popularity</structfield> member in
+<function>__cache_find()</function>, and now it doesn't hold a lock.
+One solution would be to make it an <type>atomic_t</type>, but for
+this usage, we don't really care about races: an approximate result is
+good enough, so I didn't change it.
+</para>
+
+<para>
+The result is that <function>cache_find()</function> requires no
+synchronization with any other functions, so is almost as fast on SMP
+as it would be on UP.
+</para>
+
+<para>
+There is a furthur optimization possible here: remember our original
+cache code, where there were no reference counts and the caller simply
+held the lock whenever using the object? This is still possible: if
+you hold the lock, no one can delete the object, so you don't need to
+get and put the reference count.
+</para>
+
+<para>
+Now, because the 'read lock' in RCU is simply disabling preemption, a
+caller which always has preemption disabled between calling
+<function>cache_find()</function> and
+<function>object_put()</function> does not need to actually get and
+put the reference count: we could expose
+<function>__cache_find()</function> by making it non-static, and
+such callers could simply call that.
+</para>
+<para>
+The benefit here is that the reference count is not written to: the
+object is not altered in any way, which is much faster on SMP
+machines due to caching.
+</para>
+ </sect1>
+
+ <sect1 id="per-cpu">
+ <title>Per-CPU Data</title>
+
+ <para>
+ Another technique for avoiding locking which is used fairly
+ widely is to duplicate information for each CPU. For example,
+ if you wanted to keep a count of a common condition, you could
+ use a spin lock and a single counter. Nice and simple.
+ </para>
+
+ <para>
+ If that was too slow (it's usually not, but if you've got a
+ really big machine to test on and can show that it is), you
+ could instead use a counter for each CPU, then none of them need
+ an exclusive lock. See <function>DEFINE_PER_CPU()</function>,
+ <function>get_cpu_var()</function> and
+ <function>put_cpu_var()</function>
+ (<filename class="headerfile">include/linux/percpu.h</filename>).
+ </para>
+
+ <para>
+ Of particular use for simple per-cpu counters is the
+ <type>local_t</type> type, and the
+ <function>cpu_local_inc()</function> and related functions,
+ which are more efficient than simple code on some architectures
+ (<filename class="headerfile">include/asm/local.h</filename>).
+ </para>
+
+ <para>
+ Note that there is no simple, reliable way of getting an exact
+ value of such a counter, without introducing more locks. This
+ is not a problem for some uses.
+ </para>
+ </sect1>
+
+ <sect1 id="mostly-hardirq">
+ <title>Data Which Mostly Used By An IRQ Handler</title>
+
+ <para>
+ If data is always accessed from within the same IRQ handler, you
+ don't need a lock at all: the kernel already guarantees that the
+ irq handler will not run simultaneously on multiple CPUs.
+ </para>
+ <para>
+ Manfred Spraul points out that you can still do this, even if
+ the data is very occasionally accessed in user context or
+ softirqs/tasklets. The irq handler doesn't use a lock, and
+ all other accesses are done as so:
+ </para>
+
+<programlisting>
+ spin_lock(&amp;lock);
+ disable_irq(irq);
+ ...
+ enable_irq(irq);
+ spin_unlock(&amp;lock);
+</programlisting>
+ <para>
+ The <function>disable_irq()</function> prevents the irq handler
+ from running (and waits for it to finish if it's currently
+ running on other CPUs). The spinlock prevents any other
+ accesses happening at the same time. Naturally, this is slower
+ than just a <function>spin_lock_irq()</function> call, so it
+ only makes sense if this type of access happens extremely
+ rarely.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="sleeping-things">
+ <title>What Functions Are Safe To Call From Interrupts?</title>
+
+ <para>
+ Many functions in the kernel sleep (ie. call schedule())
+ directly or indirectly: you can never call them while holding a
+ spinlock, or with preemption disabled. This also means you need
+ to be in user context: calling them from an interrupt is illegal.
+ </para>
+
+ <sect1 id="sleeping">
+ <title>Some Functions Which Sleep</title>
+
+ <para>
+ The most common ones are listed below, but you usually have to
+ read the code to find out if other calls are safe. If everyone
+ else who calls it can sleep, you probably need to be able to
+ sleep, too. In particular, registration and deregistration
+ functions usually expect to be called from user context, and can
+ sleep.
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ Accesses to
+ <firstterm linkend="gloss-userspace">userspace</firstterm>:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ <function>copy_from_user()</function>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>copy_to_user()</function>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>get_user()</function>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>put_user()</function>
+ </para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+
+ <listitem>
+ <para>
+ <function>kmalloc(GFP_KERNEL)</function>
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <function>mutex_lock_interruptible()</function> and
+ <function>mutex_lock()</function>
+ </para>
+ <para>
+ There is a <function>mutex_trylock()</function> which does not
+ sleep. Still, it must not be used inside interrupt context since
+ its implementation is not safe for that.
+ <function>mutex_unlock()</function> will also never sleep.
+ It cannot be used in interrupt context either since a mutex
+ must be released by the same task that acquired it.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect1>
+
+ <sect1 id="dont-sleep">
+ <title>Some Functions Which Don't Sleep</title>
+
+ <para>
+ Some functions are safe to call from any context, or holding
+ almost any lock.
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ <function>printk()</function>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>kfree()</function>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>add_timer()</function> and <function>del_timer()</function>
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect1>
+ </chapter>
+
+ <chapter id="apiref">
+ <title>Mutex API reference</title>
+!Iinclude/linux/mutex.h
+!Ekernel/mutex.c
+ </chapter>
+
+ <chapter id="references">
+ <title>Further reading</title>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ <filename>Documentation/spinlocks.txt</filename>:
+ Linus Torvalds' spinlocking tutorial in the kernel sources.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Unix Systems for Modern Architectures: Symmetric
+ Multiprocessing and Caching for Kernel Programmers:
+ </para>
+
+ <para>
+ Curt Schimmel's very good introduction to kernel level
+ locking (not written for Linux, but nearly everything
+ applies). The book is expensive, but really worth every
+ penny to understand SMP locking. [ISBN: 0201633388]
+ </para>
+ </listitem>
+ </itemizedlist>
+ </chapter>
+
+ <chapter id="thanks">
+ <title>Thanks</title>
+
+ <para>
+ Thanks to Telsa Gwynne for DocBooking, neatening and adding
+ style.
+ </para>
+
+ <para>
+ Thanks to Martin Pool, Philipp Rumpf, Stephen Rothwell, Paul
+ Mackerras, Ruedi Aschwanden, Alan Cox, Manfred Spraul, Tim
+ Waugh, Pete Zaitcev, James Morris, Robert Love, Paul McKenney,
+ John Ashby for proofreading, correcting, flaming, commenting.
+ </para>
+
+ <para>
+ Thanks to the cabal for having no influence on this document.
+ </para>
+ </chapter>
+
+ <glossary id="glossary">
+ <title>Glossary</title>
+
+ <glossentry id="gloss-preemption">
+ <glossterm>preemption</glossterm>
+ <glossdef>
+ <para>
+ Prior to 2.5, or when <symbol>CONFIG_PREEMPT</symbol> is
+ unset, processes in user context inside the kernel would not
+ preempt each other (ie. you had that CPU until you gave it up,
+ except for interrupts). With the addition of
+ <symbol>CONFIG_PREEMPT</symbol> in 2.5.4, this changed: when
+ in user context, higher priority tasks can "cut in": spinlocks
+ were changed to disable preemption, even on UP.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-bh">
+ <glossterm>bh</glossterm>
+ <glossdef>
+ <para>
+ Bottom Half: for historical reasons, functions with
+ '_bh' in them often now refer to any software interrupt, e.g.
+ <function>spin_lock_bh()</function> blocks any software interrupt
+ on the current CPU. Bottom halves are deprecated, and will
+ eventually be replaced by tasklets. Only one bottom half will be
+ running at any time.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-hwinterrupt">
+ <glossterm>Hardware Interrupt / Hardware IRQ</glossterm>
+ <glossdef>
+ <para>
+ Hardware interrupt request. <function>in_irq()</function> returns
+ <returnvalue>true</returnvalue> in a hardware interrupt handler.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-interruptcontext">
+ <glossterm>Interrupt Context</glossterm>
+ <glossdef>
+ <para>
+ Not user context: processing a hardware irq or software irq.
+ Indicated by the <function>in_interrupt()</function> macro
+ returning <returnvalue>true</returnvalue>.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-smp">
+ <glossterm><acronym>SMP</acronym></glossterm>
+ <glossdef>
+ <para>
+ Symmetric Multi-Processor: kernels compiled for multiple-CPU
+ machines. (CONFIG_SMP=y).
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-softirq">
+ <glossterm>Software Interrupt / softirq</glossterm>
+ <glossdef>
+ <para>
+ Software interrupt handler. <function>in_irq()</function> returns
+ <returnvalue>false</returnvalue>; <function>in_softirq()</function>
+ returns <returnvalue>true</returnvalue>. Tasklets and softirqs
+ both fall into the category of 'software interrupts'.
+ </para>
+ <para>
+ Strictly speaking a softirq is one of up to 32 enumerated software
+ interrupts which can run on multiple CPUs at once.
+ Sometimes used to refer to tasklets as
+ well (ie. all software interrupts).
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-tasklet">
+ <glossterm>tasklet</glossterm>
+ <glossdef>
+ <para>
+ A dynamically-registrable software interrupt,
+ which is guaranteed to only run on one CPU at a time.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-timers">
+ <glossterm>timer</glossterm>
+ <glossdef>
+ <para>
+ A dynamically-registrable software interrupt, which is run at
+ (or close to) a given time. When running, it is just like a
+ tasklet (in fact, they are called from the TIMER_SOFTIRQ).
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-up">
+ <glossterm><acronym>UP</acronym></glossterm>
+ <glossdef>
+ <para>
+ Uni-Processor: Non-SMP. (CONFIG_SMP=n).
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-usercontext">
+ <glossterm>User Context</glossterm>
+ <glossdef>
+ <para>
+ The kernel executing on behalf of a particular process (ie. a
+ system call or trap) or kernel thread. You can tell which
+ process with the <symbol>current</symbol> macro.) Not to
+ be confused with userspace. Can be interrupted by software or
+ hardware interrupts.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-userspace">
+ <glossterm>Userspace</glossterm>
+ <glossdef>
+ <para>
+ A process executing its own code outside the kernel.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ </glossary>
+</book>
+
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
new file mode 100644
index 00000000..f77358f9
--- /dev/null
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -0,0 +1,917 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="kgdbOnLinux">
+ <bookinfo>
+ <title>Using kgdb, kdb and the kernel debugger internals</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Jason</firstname>
+ <surname>Wessel</surname>
+ <affiliation>
+ <address>
+ <email>jason.wessel@windriver.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+ <copyright>
+ <year>2008,2010</year>
+ <holder>Wind River Systems, Inc.</holder>
+ </copyright>
+ <copyright>
+ <year>2004-2005</year>
+ <holder>MontaVista Software, Inc.</holder>
+ </copyright>
+ <copyright>
+ <year>2004</year>
+ <holder>Amit S. Kale</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This file is licensed under the terms of the GNU General Public License
+ version 2. This program is licensed "as is" without any warranty of any
+ kind, whether express or implied.
+ </para>
+
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+ <chapter id="Introduction">
+ <title>Introduction</title>
+ <para>
+ The kernel has two different debugger front ends (kdb and kgdb)
+ which interface to the debug core. It is possible to use either
+ of the debugger front ends and dynamically transition between them
+ if you configure the kernel properly at compile and runtime.
+ </para>
+ <para>
+ Kdb is simplistic shell-style interface which you can use on a
+ system console with a keyboard or serial console. You can use it
+ to inspect memory, registers, process lists, dmesg, and even set
+ breakpoints to stop in a certain location. Kdb is not a source
+ level debugger, although you can set breakpoints and execute some
+ basic kernel run control. Kdb is mainly aimed at doing some
+ analysis to aid in development or diagnosing kernel problems. You
+ can access some symbols by name in kernel built-ins or in kernel
+ modules if the code was built
+ with <symbol>CONFIG_KALLSYMS</symbol>.
+ </para>
+ <para>
+ Kgdb is intended to be used as a source level debugger for the
+ Linux kernel. It is used along with gdb to debug a Linux kernel.
+ The expectation is that gdb can be used to "break in" to the
+ kernel to inspect memory, variables and look through call stack
+ information similar to the way an application developer would use
+ gdb to debug an application. It is possible to place breakpoints
+ in kernel code and perform some limited execution stepping.
+ </para>
+ <para>
+ Two machines are required for using kgdb. One of these machines is
+ a development machine and the other is the target machine. The
+ kernel to be debugged runs on the target machine. The development
+ machine runs an instance of gdb against the vmlinux file which
+ contains the symbols (not boot image such as bzImage, zImage,
+ uImage...). In gdb the developer specifies the connection
+ parameters and connects to kgdb. The type of connection a
+ developer makes with gdb depends on the availability of kgdb I/O
+ modules compiled as built-ins or loadable kernel modules in the test
+ machine's kernel.
+ </para>
+ </chapter>
+ <chapter id="CompilingAKernel">
+ <title>Compiling a kernel</title>
+ <para>
+ <itemizedlist>
+ <listitem><para>In order to enable compilation of kdb, you must first enable kgdb.</para></listitem>
+ <listitem><para>The kgdb test compile options are described in the kgdb test suite chapter.</para></listitem>
+ </itemizedlist>
+ </para>
+ <sect1 id="CompileKGDB">
+ <title>Kernel config options for kgdb</title>
+ <para>
+ To enable <symbol>CONFIG_KGDB</symbol> you should look under
+ "Kernel debugging" and select "KGDB: kernel debugger".
+ </para>
+ <para>
+ While it is not a hard requirement that you have symbols in your
+ vmlinux file, gdb tends not to be very useful without the symbolic
+ data, so you will want to turn
+ on <symbol>CONFIG_DEBUG_INFO</symbol> which is called "Compile the
+ kernel with debug info" in the config menu.
+ </para>
+ <para>
+ It is advised, but not required that you turn on the
+ <symbol>CONFIG_FRAME_POINTER</symbol> kernel option which is called "Compile the
+ kernel with frame pointers" in the config menu. This option
+ inserts code to into the compiled executable which saves the frame
+ information in registers or on the stack at different points which
+ allows a debugger such as gdb to more accurately construct
+ stack back traces while debugging the kernel.
+ </para>
+ <para>
+ If the architecture that you are using supports the kernel option
+ CONFIG_DEBUG_RODATA, you should consider turning it off. This
+ option will prevent the use of software breakpoints because it
+ marks certain regions of the kernel's memory space as read-only.
+ If kgdb supports it for the architecture you are using, you can
+ use hardware breakpoints if you desire to run with the
+ CONFIG_DEBUG_RODATA option turned on, else you need to turn off
+ this option.
+ </para>
+ <para>
+ Next you should choose one of more I/O drivers to interconnect
+ debugging host and debugged target. Early boot debugging requires
+ a KGDB I/O driver that supports early debugging and the driver
+ must be built into the kernel directly. Kgdb I/O driver
+ configuration takes place via kernel or module parameters which
+ you can learn more about in the in the section that describes the
+ parameter "kgdboc".
+ </para>
+ <para>Here is an example set of .config symbols to enable or
+ disable for kgdb:
+ <itemizedlist>
+ <listitem><para># CONFIG_DEBUG_RODATA is not set</para></listitem>
+ <listitem><para>CONFIG_FRAME_POINTER=y</para></listitem>
+ <listitem><para>CONFIG_KGDB=y</para></listitem>
+ <listitem><para>CONFIG_KGDB_SERIAL_CONSOLE=y</para></listitem>
+ </itemizedlist>
+ </para>
+ </sect1>
+ <sect1 id="CompileKDB">
+ <title>Kernel config options for kdb</title>
+ <para>Kdb is quite a bit more complex than the simple gdbstub
+ sitting on top of the kernel's debug core. Kdb must implement a
+ shell, and also adds some helper functions in other parts of the
+ kernel, responsible for printing out interesting data such as what
+ you would see if you ran "lsmod", or "ps". In order to build kdb
+ into the kernel you follow the same steps as you would for kgdb.
+ </para>
+ <para>The main config option for kdb
+ is <symbol>CONFIG_KGDB_KDB</symbol> which is called "KGDB_KDB:
+ include kdb frontend for kgdb" in the config menu. In theory you
+ would have already also selected an I/O driver such as the
+ CONFIG_KGDB_SERIAL_CONSOLE interface if you plan on using kdb on a
+ serial port, when you were configuring kgdb.
+ </para>
+ <para>If you want to use a PS/2-style keyboard with kdb, you would
+ select CONFIG_KDB_KEYBOARD which is called "KGDB_KDB: keyboard as
+ input device" in the config menu. The CONFIG_KDB_KEYBOARD option
+ is not used for anything in the gdb interface to kgdb. The
+ CONFIG_KDB_KEYBOARD option only works with kdb.
+ </para>
+ <para>Here is an example set of .config symbols to enable/disable kdb:
+ <itemizedlist>
+ <listitem><para># CONFIG_DEBUG_RODATA is not set</para></listitem>
+ <listitem><para>CONFIG_FRAME_POINTER=y</para></listitem>
+ <listitem><para>CONFIG_KGDB=y</para></listitem>
+ <listitem><para>CONFIG_KGDB_SERIAL_CONSOLE=y</para></listitem>
+ <listitem><para>CONFIG_KGDB_KDB=y</para></listitem>
+ <listitem><para>CONFIG_KDB_KEYBOARD=y</para></listitem>
+ </itemizedlist>
+ </para>
+ </sect1>
+ </chapter>
+ <chapter id="kgdbKernelArgs">
+ <title>Kernel Debugger Boot Arguments</title>
+ <para>This section describes the various runtime kernel
+ parameters that affect the configuration of the kernel debugger.
+ The following chapter covers using kdb and kgdb as well as
+ provides some examples of the configuration parameters.</para>
+ <sect1 id="kgdboc">
+ <title>Kernel parameter: kgdboc</title>
+ <para>The kgdboc driver was originally an abbreviation meant to
+ stand for "kgdb over console". Today it is the primary mechanism
+ to configure how to communicate from gdb to kgdb as well as the
+ devices you want to use to interact with the kdb shell.
+ </para>
+ <para>For kgdb/gdb, kgdboc is designed to work with a single serial
+ port. It is intended to cover the circumstance where you want to
+ use a serial console as your primary console as well as using it to
+ perform kernel debugging. It is also possible to use kgdb on a
+ serial port which is not designated as a system console. Kgdboc
+ may be configured as a kernel built-in or a kernel loadable module.
+ You can only make use of <constant>kgdbwait</constant> and early
+ debugging if you build kgdboc into the kernel as a built-in.
+ <para>Optionally you can elect to activate kms (Kernel Mode
+ Setting) integration. When you use kms with kgdboc and you have a
+ video driver that has atomic mode setting hooks, it is possible to
+ enter the debugger on the graphics console. When the kernel
+ execution is resumed, the previous graphics mode will be restored.
+ This integration can serve as a useful tool to aid in diagnosing
+ crashes or doing analysis of memory with kdb while allowing the
+ full graphics console applications to run.
+ </para>
+ </para>
+ <sect2 id="kgdbocArgs">
+ <title>kgdboc arguments</title>
+ <para>Usage: <constant>kgdboc=[kms][[,]kbd][[,]serial_device][,baud]</constant></para>
+ <para>The order listed above must be observed if you use any of the
+ optional configurations together.
+ </para>
+ <para>Abbreviations:
+ <itemizedlist>
+ <listitem><para>kms = Kernel Mode Setting</para></listitem>
+ <listitem><para>kbd = Keyboard</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>You can configure kgdboc to use the keyboard, and or a serial
+ device depending on if you are using kdb and or kgdb, in one of the
+ following scenarios. The order listed above must be observed if
+ you use any of the optional configurations together. Using kms +
+ only gdb is generally not a useful combination.</para>
+ <sect3 id="kgdbocArgs1">
+ <title>Using loadable module or built-in</title>
+ <para>
+ <orderedlist>
+ <listitem><para>As a kernel built-in:</para>
+ <para>Use the kernel boot argument: <constant>kgdboc=&lt;tty-device&gt;,[baud]</constant></para></listitem>
+ <listitem>
+ <para>As a kernel loadable module:</para>
+ <para>Use the command: <constant>modprobe kgdboc kgdboc=&lt;tty-device&gt;,[baud]</constant></para>
+ <para>Here are two examples of how you might format the kgdboc
+ string. The first is for an x86 target using the first serial port.
+ The second example is for the ARM Versatile AB using the second
+ serial port.
+ <orderedlist>
+ <listitem><para><constant>kgdboc=ttyS0,115200</constant></para></listitem>
+ <listitem><para><constant>kgdboc=ttyAMA1,115200</constant></para></listitem>
+ </orderedlist>
+ </para>
+ </listitem>
+ </orderedlist></para>
+ </sect3>
+ <sect3 id="kgdbocArgs2">
+ <title>Configure kgdboc at runtime with sysfs</title>
+ <para>At run time you can enable or disable kgdboc by echoing a
+ parameters into the sysfs. Here are two examples:</para>
+ <orderedlist>
+ <listitem><para>Enable kgdboc on ttyS0</para>
+ <para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+ <listitem><para>Disable kgdboc</para>
+ <para><constant>echo "" &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+ </orderedlist>
+ <para>NOTE: You do not need to specify the baud if you are
+ configuring the console on tty which is already configured or
+ open.</para>
+ </sect3>
+ <sect3 id="kgdbocArgs3">
+ <title>More examples</title>
+ <para>You can configure kgdboc to use the keyboard, and or a serial
+ device depending on if you are using kdb and or kgdb, in one of the
+ following scenarios.</para>
+ <para>You can configure kgdboc to use the keyboard, and or a serial device
+ depending on if you are using kdb and or kgdb, in one of the
+ following scenarios.
+ <orderedlist>
+ <listitem><para>kdb and kgdb over only a serial port</para>
+ <para><constant>kgdboc=&lt;serial_device&gt;[,baud]</constant></para>
+ <para>Example: <constant>kgdboc=ttyS0,115200</constant></para>
+ </listitem>
+ <listitem><para>kdb and kgdb with keyboard and a serial port</para>
+ <para><constant>kgdboc=kbd,&lt;serial_device&gt;[,baud]</constant></para>
+ <para>Example: <constant>kgdboc=kbd,ttyS0,115200</constant></para>
+ </listitem>
+ <listitem><para>kdb with a keyboard</para>
+ <para><constant>kgdboc=kbd</constant></para>
+ </listitem>
+ <listitem><para>kdb with kernel mode setting</para>
+ <para><constant>kgdboc=kms,kbd</constant></para>
+ </listitem>
+ <listitem><para>kdb with kernel mode setting and kgdb over a serial port</para>
+ <para><constant>kgdboc=kms,kbd,ttyS0,115200</constant></para>
+ </listitem>
+ </orderedlist>
+ </para>
+ </sect3>
+ <para>NOTE: Kgdboc does not support interrupting the target via the
+ gdb remote protocol. You must manually send a sysrq-g unless you
+ have a proxy that splits console output to a terminal program.
+ A console proxy has a separate TCP port for the debugger and a separate
+ TCP port for the "human" console. The proxy can take care of sending
+ the sysrq-g for you.
+ </para>
+ <para>When using kgdboc with no debugger proxy, you can end up
+ connecting the debugger at one of two entry points. If an
+ exception occurs after you have loaded kgdboc, a message should
+ print on the console stating it is waiting for the debugger. In
+ this case you disconnect your terminal program and then connect the
+ debugger in its place. If you want to interrupt the target system
+ and forcibly enter a debug session you have to issue a Sysrq
+ sequence and then type the letter <constant>g</constant>. Then
+ you disconnect the terminal session and connect gdb. Your options
+ if you don't like this are to hack gdb to send the sysrq-g for you
+ as well as on the initial connect, or to use a debugger proxy that
+ allows an unmodified gdb to do the debugging.
+ </para>
+ </sect2>
+ </sect1>
+ <sect1 id="kgdbwait">
+ <title>Kernel parameter: kgdbwait</title>
+ <para>
+ The Kernel command line option <constant>kgdbwait</constant> makes
+ kgdb wait for a debugger connection during booting of a kernel. You
+ can only use this option you compiled a kgdb I/O driver into the
+ kernel and you specified the I/O driver configuration as a kernel
+ command line option. The kgdbwait parameter should always follow the
+ configuration parameter for the kgdb I/O driver in the kernel
+ command line else the I/O driver will not be configured prior to
+ asking the kernel to use it to wait.
+ </para>
+ <para>
+ The kernel will stop and wait as early as the I/O driver and
+ architecture allows when you use this option. If you build the
+ kgdb I/O driver as a loadable kernel module kgdbwait will not do
+ anything.
+ </para>
+ </sect1>
+ <sect1 id="kgdbcon">
+ <title>Kernel parameter: kgdbcon</title>
+ <para> The kgdbcon feature allows you to see printk() messages
+ inside gdb while gdb is connected to the kernel. Kdb does not make
+ use of the kgdbcon feature.
+ </para>
+ <para>Kgdb supports using the gdb serial protocol to send console
+ messages to the debugger when the debugger is connected and running.
+ There are two ways to activate this feature.
+ <orderedlist>
+ <listitem><para>Activate with the kernel command line option:</para>
+ <para><constant>kgdbcon</constant></para>
+ </listitem>
+ <listitem><para>Use sysfs before configuring an I/O driver</para>
+ <para>
+ <constant>echo 1 &gt; /sys/module/kgdb/parameters/kgdb_use_con</constant>
+ </para>
+ <para>
+ NOTE: If you do this after you configure the kgdb I/O driver, the
+ setting will not take effect until the next point the I/O is
+ reconfigured.
+ </para>
+ </listitem>
+ </orderedlist>
+ <para>IMPORTANT NOTE: You cannot use kgdboc + kgdbcon on a tty that is an
+ active system console. An example incorrect usage is <constant>console=ttyS0,115200 kgdboc=ttyS0 kgdbcon</constant>
+ </para>
+ <para>It is possible to use this option with kgdboc on a tty that is not a system console.
+ </para>
+ </para>
+ </sect1>
+ <sect1 id="kgdbreboot">
+ <title>Run time parameter: kgdbreboot</title>
+ <para> The kgdbreboot feature allows you to change how the debugger
+ deals with the reboot notification. You have 3 choices for the
+ behavior. The default behavior is always set to 0.</para>
+ <orderedlist>
+ <listitem><para>echo -1 > /sys/module/debug_core/parameters/kgdbreboot</para>
+ <para>Ignore the reboot notification entirely.</para>
+ </listitem>
+ <listitem><para>echo 0 > /sys/module/debug_core/parameters/kgdbreboot</para>
+ <para>Send the detach message to any attached debugger client.</para>
+ </listitem>
+ <listitem><para>echo 1 > /sys/module/debug_core/parameters/kgdbreboot</para>
+ <para>Enter the debugger on reboot notify.</para>
+ </listitem>
+ </orderedlist>
+ </sect1>
+ </chapter>
+ <chapter id="usingKDB">
+ <title>Using kdb</title>
+ <para>
+ </para>
+ <sect1 id="quickKDBserial">
+ <title>Quick start for kdb on a serial port</title>
+ <para>This is a quick example of how to use kdb.</para>
+ <para><orderedlist>
+ <listitem><para>Boot kernel with arguments:
+ <itemizedlist>
+ <listitem><para><constant>console=ttyS0,115200 kgdboc=ttyS0,115200</constant></para></listitem>
+ </itemizedlist></para>
+ <para>OR</para>
+ <para>Configure kgdboc after the kernel booted; assuming you are using a serial port console:
+ <itemizedlist>
+ <listitem><para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem><para>Enter the kernel debugger manually or by waiting for an oops or fault. There are several ways you can enter the kernel debugger manually; all involve using the sysrq-g, which means you must have enabled CONFIG_MAGIC_SYSRQ=y in your kernel config.</para>
+ <itemizedlist>
+ <listitem><para>When logged in as root or with a super user session you can run:</para>
+ <para><constant>echo g &gt; /proc/sysrq-trigger</constant></para></listitem>
+ <listitem><para>Example using minicom 2.2</para>
+ <para>Press: <constant>Control-a</constant></para>
+ <para>Press: <constant>f</constant></para>
+ <para>Press: <constant>g</constant></para>
+ </listitem>
+ <listitem><para>When you have telneted to a terminal server that supports sending a remote break</para>
+ <para>Press: <constant>Control-]</constant></para>
+ <para>Type in:<constant>send break</constant></para>
+ <para>Press: <constant>Enter</constant></para>
+ <para>Press: <constant>g</constant></para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+ <listitem><para>From the kdb prompt you can run the "help" command to see a complete list of the commands that are available.</para>
+ <para>Some useful commands in kdb include:
+ <itemizedlist>
+ <listitem><para>lsmod -- Shows where kernel modules are loaded</para></listitem>
+ <listitem><para>ps -- Displays only the active processes</para></listitem>
+ <listitem><para>ps A -- Shows all the processes</para></listitem>
+ <listitem><para>summary -- Shows kernel version info and memory usage</para></listitem>
+ <listitem><para>bt -- Get a backtrace of the current process using dump_stack()</para></listitem>
+ <listitem><para>dmesg -- View the kernel syslog buffer</para></listitem>
+ <listitem><para>go -- Continue the system</para></listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem>
+ <para>When you are done using kdb you need to consider rebooting the
+ system or using the "go" command to resuming normal kernel
+ execution. If you have paused the kernel for a lengthy period of
+ time, applications that rely on timely networking or anything to do
+ with real wall clock time could be adversely affected, so you
+ should take this into consideration when using the kernel
+ debugger.</para>
+ </listitem>
+ </orderedlist></para>
+ </sect1>
+ <sect1 id="quickKDBkeyboard">
+ <title>Quick start for kdb using a keyboard connected console</title>
+ <para>This is a quick example of how to use kdb with a keyboard.</para>
+ <para><orderedlist>
+ <listitem><para>Boot kernel with arguments:
+ <itemizedlist>
+ <listitem><para><constant>kgdboc=kbd</constant></para></listitem>
+ </itemizedlist></para>
+ <para>OR</para>
+ <para>Configure kgdboc after the kernel booted:
+ <itemizedlist>
+ <listitem><para><constant>echo kbd &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem><para>Enter the kernel debugger manually or by waiting for an oops or fault. There are several ways you can enter the kernel debugger manually; all involve using the sysrq-g, which means you must have enabled CONFIG_MAGIC_SYSRQ=y in your kernel config.</para>
+ <itemizedlist>
+ <listitem><para>When logged in as root or with a super user session you can run:</para>
+ <para><constant>echo g &gt; /proc/sysrq-trigger</constant></para></listitem>
+ <listitem><para>Example using a laptop keyboard</para>
+ <para>Press and hold down: <constant>Alt</constant></para>
+ <para>Press and hold down: <constant>Fn</constant></para>
+ <para>Press and release the key with the label: <constant>SysRq</constant></para>
+ <para>Release: <constant>Fn</constant></para>
+ <para>Press and release: <constant>g</constant></para>
+ <para>Release: <constant>Alt</constant></para>
+ </listitem>
+ <listitem><para>Example using a PS/2 101-key keyboard</para>
+ <para>Press and hold down: <constant>Alt</constant></para>
+ <para>Press and release the key with the label: <constant>SysRq</constant></para>
+ <para>Press and release: <constant>g</constant></para>
+ <para>Release: <constant>Alt</constant></para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+ <listitem>
+ <para>Now type in a kdb command such as "help", "dmesg", "bt" or "go" to continue kernel execution.</para>
+ </listitem>
+ </orderedlist></para>
+ </sect1>
+ </chapter>
+ <chapter id="EnableKGDB">
+ <title>Using kgdb / gdb</title>
+ <para>In order to use kgdb you must activate it by passing
+ configuration information to one of the kgdb I/O drivers. If you
+ do not pass any configuration information kgdb will not do anything
+ at all. Kgdb will only actively hook up to the kernel trap hooks
+ if a kgdb I/O driver is loaded and configured. If you unconfigure
+ a kgdb I/O driver, kgdb will unregister all the kernel hook points.
+ </para>
+ <para> All kgdb I/O drivers can be reconfigured at run time, if
+ <symbol>CONFIG_SYSFS</symbol> and <symbol>CONFIG_MODULES</symbol>
+ are enabled, by echo'ing a new config string to
+ <constant>/sys/module/&lt;driver&gt;/parameter/&lt;option&gt;</constant>.
+ The driver can be unconfigured by passing an empty string. You cannot
+ change the configuration while the debugger is attached. Make sure
+ to detach the debugger with the <constant>detach</constant> command
+ prior to trying to unconfigure a kgdb I/O driver.
+ </para>
+ <sect1 id="ConnectingGDB">
+ <title>Connecting with gdb to a serial port</title>
+ <orderedlist>
+ <listitem><para>Configure kgdboc</para>
+ <para>Boot kernel with arguments:
+ <itemizedlist>
+ <listitem><para><constant>kgdboc=ttyS0,115200</constant></para></listitem>
+ </itemizedlist></para>
+ <para>OR</para>
+ <para>Configure kgdboc after the kernel booted:
+ <itemizedlist>
+ <listitem><para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+ </itemizedlist></para>
+ </listitem>
+ <listitem>
+ <para>Stop kernel execution (break into the debugger)</para>
+ <para>In order to connect to gdb via kgdboc, the kernel must
+ first be stopped. There are several ways to stop the kernel which
+ include using kgdbwait as a boot argument, via a sysrq-g, or running
+ the kernel until it takes an exception where it waits for the
+ debugger to attach.
+ <itemizedlist>
+ <listitem><para>When logged in as root or with a super user session you can run:</para>
+ <para><constant>echo g &gt; /proc/sysrq-trigger</constant></para></listitem>
+ <listitem><para>Example using minicom 2.2</para>
+ <para>Press: <constant>Control-a</constant></para>
+ <para>Press: <constant>f</constant></para>
+ <para>Press: <constant>g</constant></para>
+ </listitem>
+ <listitem><para>When you have telneted to a terminal server that supports sending a remote break</para>
+ <para>Press: <constant>Control-]</constant></para>
+ <para>Type in:<constant>send break</constant></para>
+ <para>Press: <constant>Enter</constant></para>
+ <para>Press: <constant>g</constant></para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem>
+ <para>Connect from from gdb</para>
+ <para>
+ Example (using a directly connected port):
+ </para>
+ <programlisting>
+ % gdb ./vmlinux
+ (gdb) set remotebaud 115200
+ (gdb) target remote /dev/ttyS0
+ </programlisting>
+ <para>
+ Example (kgdb to a terminal server on TCP port 2012):
+ </para>
+ <programlisting>
+ % gdb ./vmlinux
+ (gdb) target remote 192.168.2.2:2012
+ </programlisting>
+ <para>
+ Once connected, you can debug a kernel the way you would debug an
+ application program.
+ </para>
+ <para>
+ If you are having problems connecting or something is going
+ seriously wrong while debugging, it will most often be the case
+ that you want to enable gdb to be verbose about its target
+ communications. You do this prior to issuing the <constant>target
+ remote</constant> command by typing in: <constant>set debug remote 1</constant>
+ </para>
+ </listitem>
+ </orderedlist>
+ <para>Remember if you continue in gdb, and need to "break in" again,
+ you need to issue an other sysrq-g. It is easy to create a simple
+ entry point by putting a breakpoint at <constant>sys_sync</constant>
+ and then you can run "sync" from a shell or script to break into the
+ debugger.</para>
+ </sect1>
+ </chapter>
+ <chapter id="switchKdbKgdb">
+ <title>kgdb and kdb interoperability</title>
+ <para>It is possible to transition between kdb and kgdb dynamically.
+ The debug core will remember which you used the last time and
+ automatically start in the same mode.</para>
+ <sect1>
+ <title>Switching between kdb and kgdb</title>
+ <sect2>
+ <title>Switching from kgdb to kdb</title>
+ <para>
+ There are two ways to switch from kgdb to kdb: you can use gdb to
+ issue a maintenance packet, or you can blindly type the command $3#33.
+ Whenever kernel debugger stops in kgdb mode it will print the
+ message <constant>KGDB or $3#33 for KDB</constant>. It is important
+ to note that you have to type the sequence correctly in one pass.
+ You cannot type a backspace or delete because kgdb will interpret
+ that as part of the debug stream.
+ <orderedlist>
+ <listitem><para>Change from kgdb to kdb by blindly typing:</para>
+ <para><constant>$3#33</constant></para></listitem>
+ <listitem><para>Change from kgdb to kdb with gdb</para>
+ <para><constant>maintenance packet 3</constant></para>
+ <para>NOTE: Now you must kill gdb. Typically you press control-z and
+ issue the command: kill -9 %</para></listitem>
+ </orderedlist>
+ </para>
+ </sect2>
+ <sect2>
+ <title>Change from kdb to kgdb</title>
+ <para>There are two ways you can change from kdb to kgdb. You can
+ manually enter kgdb mode by issuing the kgdb command from the kdb
+ shell prompt, or you can connect gdb while the kdb shell prompt is
+ active. The kdb shell looks for the typical first commands that gdb
+ would issue with the gdb remote protocol and if it sees one of those
+ commands it automatically changes into kgdb mode.</para>
+ <orderedlist>
+ <listitem><para>From kdb issue the command:</para>
+ <para><constant>kgdb</constant></para>
+ <para>Now disconnect your terminal program and connect gdb in its place</para></listitem>
+ <listitem><para>At the kdb prompt, disconnect the terminal program and connect gdb in its place.</para></listitem>
+ </orderedlist>
+ </sect2>
+ </sect1>
+ <sect1>
+ <title>Running kdb commands from gdb</title>
+ <para>It is possible to run a limited set of kdb commands from gdb,
+ using the gdb monitor command. You don't want to execute any of the
+ run control or breakpoint operations, because it can disrupt the
+ state of the kernel debugger. You should be using gdb for
+ breakpoints and run control operations if you have gdb connected.
+ The more useful commands to run are things like lsmod, dmesg, ps or
+ possibly some of the memory information commands. To see all the kdb
+ commands you can run <constant>monitor help</constant>.</para>
+ <para>Example:
+ <informalexample><programlisting>
+(gdb) monitor ps
+1 idle process (state I) and
+27 sleeping system daemon (state M) processes suppressed,
+use 'ps A' to see all.
+Task Addr Pid Parent [*] cpu State Thread Command
+
+0xc78291d0 1 0 0 0 S 0xc7829404 init
+0xc7954150 942 1 0 0 S 0xc7954384 dropbear
+0xc78789c0 944 1 0 0 S 0xc7878bf4 sh
+(gdb)
+ </programlisting></informalexample>
+ </para>
+ </sect1>
+ </chapter>
+ <chapter id="KGDBTestSuite">
+ <title>kgdb Test Suite</title>
+ <para>
+ When kgdb is enabled in the kernel config you can also elect to
+ enable the config parameter KGDB_TESTS. Turning this on will
+ enable a special kgdb I/O module which is designed to test the
+ kgdb internal functions.
+ </para>
+ <para>
+ The kgdb tests are mainly intended for developers to test the kgdb
+ internals as well as a tool for developing a new kgdb architecture
+ specific implementation. These tests are not really for end users
+ of the Linux kernel. The primary source of documentation would be
+ to look in the drivers/misc/kgdbts.c file.
+ </para>
+ <para>
+ The kgdb test suite can also be configured at compile time to run
+ the core set of tests by setting the kernel config parameter
+ KGDB_TESTS_ON_BOOT. This particular option is aimed at automated
+ regression testing and does not require modifying the kernel boot
+ config arguments. If this is turned on, the kgdb test suite can
+ be disabled by specifying "kgdbts=" as a kernel boot argument.
+ </para>
+ </chapter>
+ <chapter id="CommonBackEndReq">
+ <title>Kernel Debugger Internals</title>
+ <sect1 id="kgdbArchitecture">
+ <title>Architecture Specifics</title>
+ <para>
+ The kernel debugger is organized into a number of components:
+ <orderedlist>
+ <listitem><para>The debug core</para>
+ <para>
+ The debug core is found in kernel/debugger/debug_core.c. It contains:
+ <itemizedlist>
+ <listitem><para>A generic OS exception handler which includes
+ sync'ing the processors into a stopped state on an multi-CPU
+ system.</para></listitem>
+ <listitem><para>The API to talk to the kgdb I/O drivers</para></listitem>
+ <listitem><para>The API to make calls to the arch-specific kgdb implementation</para></listitem>
+ <listitem><para>The logic to perform safe memory reads and writes to memory while using the debugger</para></listitem>
+ <listitem><para>A full implementation for software breakpoints unless overridden by the arch</para></listitem>
+ <listitem><para>The API to invoke either the kdb or kgdb frontend to the debug core.</para></listitem>
+ <listitem><para>The structures and callback API for atomic kernel mode setting.</para>
+ <para>NOTE: kgdboc is where the kms callbacks are invoked.</para></listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem><para>kgdb arch-specific implementation</para>
+ <para>
+ This implementation is generally found in arch/*/kernel/kgdb.c.
+ As an example, arch/x86/kernel/kgdb.c contains the specifics to
+ implement HW breakpoint as well as the initialization to
+ dynamically register and unregister for the trap handlers on
+ this architecture. The arch-specific portion implements:
+ <itemizedlist>
+ <listitem><para>contains an arch-specific trap catcher which
+ invokes kgdb_handle_exception() to start kgdb about doing its
+ work</para></listitem>
+ <listitem><para>translation to and from gdb specific packet format to pt_regs</para></listitem>
+ <listitem><para>Registration and unregistration of architecture specific trap hooks</para></listitem>
+ <listitem><para>Any special exception handling and cleanup</para></listitem>
+ <listitem><para>NMI exception handling and cleanup</para></listitem>
+ <listitem><para>(optional)HW breakpoints</para></listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem><para>gdbstub frontend (aka kgdb)</para>
+ <para>The gdbstub is located in kernel/debug/gdbstub.c. It contains:</para>
+ <itemizedlist>
+ <listitem><para>All the logic to implement the gdb serial protocol</para></listitem>
+ </itemizedlist>
+ </listitem>
+ <listitem><para>kdb frontend</para>
+ <para>The kdb debugger shell is broken down into a number of
+ components. The kdb core is located in kernel/debug/kdb. There
+ are a number of helper functions in some of the other kernel
+ components to make it possible for kdb to examine and report
+ information about the kernel without taking locks that could
+ cause a kernel deadlock. The kdb core contains implements the following functionality.</para>
+ <itemizedlist>
+ <listitem><para>A simple shell</para></listitem>
+ <listitem><para>The kdb core command set</para></listitem>
+ <listitem><para>A registration API to register additional kdb shell commands.</para>
+ <itemizedlist>
+ <listitem><para>A good example of a self-contained kdb module
+ is the "ftdump" command for dumping the ftrace buffer. See:
+ kernel/trace/trace_kdb.c</para></listitem>
+ <listitem><para>For an example of how to dynamically register
+ a new kdb command you can build the kdb_hello.ko kernel module
+ from samples/kdb/kdb_hello.c. To build this example you can
+ set CONFIG_SAMPLES=y and CONFIG_SAMPLE_KDB=m in your kernel
+ config. Later run "modprobe kdb_hello" and the next time you
+ enter the kdb shell, you can run the "hello"
+ command.</para></listitem>
+ </itemizedlist></listitem>
+ <listitem><para>The implementation for kdb_printf() which
+ emits messages directly to I/O drivers, bypassing the kernel
+ log.</para></listitem>
+ <listitem><para>SW / HW breakpoint management for the kdb shell</para></listitem>
+ </itemizedlist>
+ </listitem>
+ <listitem><para>kgdb I/O driver</para>
+ <para>
+ Each kgdb I/O driver has to provide an implementation for the following:
+ <itemizedlist>
+ <listitem><para>configuration via built-in or module</para></listitem>
+ <listitem><para>dynamic configuration and kgdb hook registration calls</para></listitem>
+ <listitem><para>read and write character interface</para></listitem>
+ <listitem><para>A cleanup handler for unconfiguring from the kgdb core</para></listitem>
+ <listitem><para>(optional) Early debug methodology</para></listitem>
+ </itemizedlist>
+ Any given kgdb I/O driver has to operate very closely with the
+ hardware and must do it in such a way that does not enable
+ interrupts or change other parts of the system context without
+ completely restoring them. The kgdb core will repeatedly "poll"
+ a kgdb I/O driver for characters when it needs input. The I/O
+ driver is expected to return immediately if there is no data
+ available. Doing so allows for the future possibility to touch
+ watch dog hardware in such a way as to have a target system not
+ reset when these are enabled.
+ </para>
+ </listitem>
+ </orderedlist>
+ </para>
+ <para>
+ If you are intent on adding kgdb architecture specific support
+ for a new architecture, the architecture should define
+ <constant>HAVE_ARCH_KGDB</constant> in the architecture specific
+ Kconfig file. This will enable kgdb for the architecture, and
+ at that point you must create an architecture specific kgdb
+ implementation.
+ </para>
+ <para>
+ There are a few flags which must be set on every architecture in
+ their &lt;asm/kgdb.h&gt; file. These are:
+ <itemizedlist>
+ <listitem>
+ <para>
+ NUMREGBYTES: The size in bytes of all of the registers, so
+ that we can ensure they will all fit into a packet.
+ </para>
+ <para>
+ BUFMAX: The size in bytes of the buffer GDB will read into.
+ This must be larger than NUMREGBYTES.
+ </para>
+ <para>
+ CACHE_FLUSH_IS_SAFE: Set to 1 if it is always safe to call
+ flush_cache_range or flush_icache_range. On some architectures,
+ these functions may not be safe to call on SMP since we keep other
+ CPUs in a holding pattern.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ There are also the following functions for the common backend,
+ found in kernel/kgdb.c, that must be supplied by the
+ architecture-specific backend unless marked as (optional), in
+ which case a default function maybe used if the architecture
+ does not need to provide a specific implementation.
+ </para>
+!Iinclude/linux/kgdb.h
+ </sect1>
+ <sect1 id="kgdbocDesign">
+ <title>kgdboc internals</title>
+ <sect2>
+ <title>kgdboc and uarts</title>
+ <para>
+ The kgdboc driver is actually a very thin driver that relies on the
+ underlying low level to the hardware driver having "polling hooks"
+ which the to which the tty driver is attached. In the initial
+ implementation of kgdboc it the serial_core was changed to expose a
+ low level UART hook for doing polled mode reading and writing of a
+ single character while in an atomic context. When kgdb makes an I/O
+ request to the debugger, kgdboc invokes a callback in the serial
+ core which in turn uses the callback in the UART driver.</para>
+ <para>
+ When using kgdboc with a UART, the UART driver must implement two callbacks in the <constant>struct uart_ops</constant>. Example from drivers/8250.c:<programlisting>
+#ifdef CONFIG_CONSOLE_POLL
+ .poll_get_char = serial8250_get_poll_char,
+ .poll_put_char = serial8250_put_poll_char,
+#endif
+ </programlisting>
+ Any implementation specifics around creating a polling driver use the
+ <constant>#ifdef CONFIG_CONSOLE_POLL</constant>, as shown above.
+ Keep in mind that polling hooks have to be implemented in such a way
+ that they can be called from an atomic context and have to restore
+ the state of the UART chip on return such that the system can return
+ to normal when the debugger detaches. You need to be very careful
+ with any kind of lock you consider, because failing here is most likely
+ going to mean pressing the reset button.
+ </para>
+ </sect2>
+ <sect2 id="kgdbocKbd">
+ <title>kgdboc and keyboards</title>
+ <para>The kgdboc driver contains logic to configure communications
+ with an attached keyboard. The keyboard infrastructure is only
+ compiled into the kernel when CONFIG_KDB_KEYBOARD=y is set in the
+ kernel configuration.</para>
+ <para>The core polled keyboard driver driver for PS/2 type keyboards
+ is in drivers/char/kdb_keyboard.c. This driver is hooked into the
+ debug core when kgdboc populates the callback in the array
+ called <constant>kdb_poll_funcs[]</constant>. The
+ kdb_get_kbd_char() is the top-level function which polls hardware
+ for single character input.
+ </para>
+ </sect2>
+ <sect2 id="kgdbocKms">
+ <title>kgdboc and kms</title>
+ <para>The kgdboc driver contains logic to request the graphics
+ display to switch to a text context when you are using
+ "kgdboc=kms,kbd", provided that you have a video driver which has a
+ frame buffer console and atomic kernel mode setting support.</para>
+ <para>
+ Every time the kernel
+ debugger is entered it calls kgdboc_pre_exp_handler() which in turn
+ calls con_debug_enter() in the virtual console layer. On resuming kernel
+ execution, the kernel debugger calls kgdboc_post_exp_handler() which
+ in turn calls con_debug_leave().</para>
+ <para>Any video driver that wants to be compatible with the kernel
+ debugger and the atomic kms callbacks must implement the
+ mode_set_base_atomic, fb_debug_enter and fb_debug_leave operations.
+ For the fb_debug_enter and fb_debug_leave the option exists to use
+ the generic drm fb helper functions or implement something custom for
+ the hardware. The following example shows the initialization of the
+ .mode_set_base_atomic operation in
+ drivers/gpu/drm/i915/intel_display.c:
+ <informalexample>
+ <programlisting>
+static const struct drm_crtc_helper_funcs intel_helper_funcs = {
+[...]
+ .mode_set_base_atomic = intel_pipe_set_base_atomic,
+[...]
+};
+ </programlisting>
+ </informalexample>
+ </para>
+ <para>Here is an example of how the i915 driver initializes the fb_debug_enter and fb_debug_leave functions to use the generic drm helpers in
+ drivers/gpu/drm/i915/intel_fb.c:
+ <informalexample>
+ <programlisting>
+static struct fb_ops intelfb_ops = {
+[...]
+ .fb_debug_enter = drm_fb_helper_debug_enter,
+ .fb_debug_leave = drm_fb_helper_debug_leave,
+[...]
+};
+ </programlisting>
+ </informalexample>
+ </para>
+ </sect2>
+ </sect1>
+ </chapter>
+ <chapter id="credits">
+ <title>Credits</title>
+ <para>
+ The following people have contributed to this document:
+ <orderedlist>
+ <listitem><para>Amit Kale<email>amitkale@linsyssoft.com</email></para></listitem>
+ <listitem><para>Tom Rini<email>trini@kernel.crashing.org</email></para></listitem>
+ </orderedlist>
+ In March 2008 this document was completely rewritten by:
+ <itemizedlist>
+ <listitem><para>Jason Wessel<email>jason.wessel@windriver.com</email></para></listitem>
+ </itemizedlist>
+ In Jan 2010 this document was updated to include kdb.
+ <itemizedlist>
+ <listitem><para>Jason Wessel<email>jason.wessel@windriver.com</email></para></listitem>
+ </itemizedlist>
+ </para>
+ </chapter>
+</book>
+
diff --git a/Documentation/DocBook/libata.tmpl b/Documentation/DocBook/libata.tmpl
new file mode 100644
index 00000000..deb71bae
--- /dev/null
+++ b/Documentation/DocBook/libata.tmpl
@@ -0,0 +1,1625 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="libataDevGuide">
+ <bookinfo>
+ <title>libATA Developer's Guide</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Jeff</firstname>
+ <surname>Garzik</surname>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2003-2006</year>
+ <holder>Jeff Garzik</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ The contents of this file are subject to the Open
+ Software License version 1.1 that can be found at
+ <ulink url="http://fedoraproject.org/wiki/Licensing:OSL1.1">http://fedoraproject.org/wiki/Licensing:OSL1.1</ulink>
+ and is included herein by reference.
+ </para>
+
+ <para>
+ Alternatively, the contents of this file may be used under the terms
+ of the GNU General Public License version 2 (the "GPL") as distributed
+ in the kernel source COPYING file, in which case the provisions of
+ the GPL are applicable instead of the above. If you wish to allow
+ the use of your version of this file only under the terms of the
+ GPL and not to allow others to use your version of this file under
+ the OSL, indicate your decision by deleting the provisions above and
+ replace them with the notice and other provisions required by the GPL.
+ If you do not delete the provisions above, a recipient may use your
+ version of this file under either the OSL or the GPL.
+ </para>
+
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="libataIntroduction">
+ <title>Introduction</title>
+ <para>
+ libATA is a library used inside the Linux kernel to support ATA host
+ controllers and devices. libATA provides an ATA driver API, class
+ transports for ATA and ATAPI devices, and SCSI&lt;-&gt;ATA translation
+ for ATA devices according to the T10 SAT specification.
+ </para>
+ <para>
+ This Guide documents the libATA driver API, library functions, library
+ internals, and a couple sample ATA low-level drivers.
+ </para>
+ </chapter>
+
+ <chapter id="libataDriverApi">
+ <title>libata Driver API</title>
+ <para>
+ struct ata_port_operations is defined for every low-level libata
+ hardware driver, and it controls how the low-level driver
+ interfaces with the ATA and SCSI layers.
+ </para>
+ <para>
+ FIS-based drivers will hook into the system with ->qc_prep() and
+ ->qc_issue() high-level hooks. Hardware which behaves in a manner
+ similar to PCI IDE hardware may utilize several generic helpers,
+ defining at a bare minimum the bus I/O addresses of the ATA shadow
+ register blocks.
+ </para>
+ <sect1>
+ <title>struct ata_port_operations</title>
+
+ <sect2><title>Disable ATA port</title>
+ <programlisting>
+void (*port_disable) (struct ata_port *);
+ </programlisting>
+
+ <para>
+ Called from ata_bus_probe() error path, as well as when
+ unregistering from the SCSI module (rmmod, hot unplug).
+ This function should do whatever needs to be done to take the
+ port out of use. In most cases, ata_port_disable() can be used
+ as this hook.
+ </para>
+ <para>
+ Called from ata_bus_probe() on a failed probe.
+ Called from ata_scsi_release().
+ </para>
+
+ </sect2>
+
+ <sect2><title>Post-IDENTIFY device configuration</title>
+ <programlisting>
+void (*dev_config) (struct ata_port *, struct ata_device *);
+ </programlisting>
+
+ <para>
+ Called after IDENTIFY [PACKET] DEVICE is issued to each device
+ found. Typically used to apply device-specific fixups prior to
+ issue of SET FEATURES - XFER MODE, and prior to operation.
+ </para>
+ <para>
+ This entry may be specified as NULL in ata_port_operations.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Set PIO/DMA mode</title>
+ <programlisting>
+void (*set_piomode) (struct ata_port *, struct ata_device *);
+void (*set_dmamode) (struct ata_port *, struct ata_device *);
+void (*post_set_mode) (struct ata_port *);
+unsigned int (*mode_filter) (struct ata_port *, struct ata_device *, unsigned int);
+ </programlisting>
+
+ <para>
+ Hooks called prior to the issue of SET FEATURES - XFER MODE
+ command. The optional ->mode_filter() hook is called when libata
+ has built a mask of the possible modes. This is passed to the
+ ->mode_filter() function which should return a mask of valid modes
+ after filtering those unsuitable due to hardware limits. It is not
+ valid to use this interface to add modes.
+ </para>
+ <para>
+ dev->pio_mode and dev->dma_mode are guaranteed to be valid when
+ ->set_piomode() and when ->set_dmamode() is called. The timings for
+ any other drive sharing the cable will also be valid at this point.
+ That is the library records the decisions for the modes of each
+ drive on a channel before it attempts to set any of them.
+ </para>
+ <para>
+ ->post_set_mode() is
+ called unconditionally, after the SET FEATURES - XFER MODE
+ command completes successfully.
+ </para>
+
+ <para>
+ ->set_piomode() is always called (if present), but
+ ->set_dma_mode() is only called if DMA is possible.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Taskfile read/write</title>
+ <programlisting>
+void (*sff_tf_load) (struct ata_port *ap, struct ata_taskfile *tf);
+void (*sff_tf_read) (struct ata_port *ap, struct ata_taskfile *tf);
+ </programlisting>
+
+ <para>
+ ->tf_load() is called to load the given taskfile into hardware
+ registers / DMA buffers. ->tf_read() is called to read the
+ hardware registers / DMA buffers, to obtain the current set of
+ taskfile register values.
+ Most drivers for taskfile-based hardware (PIO or MMIO) use
+ ata_sff_tf_load() and ata_sff_tf_read() for these hooks.
+ </para>
+
+ </sect2>
+
+ <sect2><title>PIO data read/write</title>
+ <programlisting>
+void (*sff_data_xfer) (struct ata_device *, unsigned char *, unsigned int, int);
+ </programlisting>
+
+ <para>
+All bmdma-style drivers must implement this hook. This is the low-level
+operation that actually copies the data bytes during a PIO data
+transfer.
+Typically the driver will choose one of ata_sff_data_xfer_noirq(),
+ata_sff_data_xfer(), or ata_sff_data_xfer32().
+ </para>
+
+ </sect2>
+
+ <sect2><title>ATA command execute</title>
+ <programlisting>
+void (*sff_exec_command)(struct ata_port *ap, struct ata_taskfile *tf);
+ </programlisting>
+
+ <para>
+ causes an ATA command, previously loaded with
+ ->tf_load(), to be initiated in hardware.
+ Most drivers for taskfile-based hardware use ata_sff_exec_command()
+ for this hook.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Per-cmd ATAPI DMA capabilities filter</title>
+ <programlisting>
+int (*check_atapi_dma) (struct ata_queued_cmd *qc);
+ </programlisting>
+
+ <para>
+Allow low-level driver to filter ATA PACKET commands, returning a status
+indicating whether or not it is OK to use DMA for the supplied PACKET
+command.
+ </para>
+ <para>
+ This hook may be specified as NULL, in which case libata will
+ assume that atapi dma can be supported.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Read specific ATA shadow registers</title>
+ <programlisting>
+u8 (*sff_check_status)(struct ata_port *ap);
+u8 (*sff_check_altstatus)(struct ata_port *ap);
+ </programlisting>
+
+ <para>
+ Reads the Status/AltStatus ATA shadow register from
+ hardware. On some hardware, reading the Status register has
+ the side effect of clearing the interrupt condition.
+ Most drivers for taskfile-based hardware use
+ ata_sff_check_status() for this hook.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Write specific ATA shadow register</title>
+ <programlisting>
+void (*sff_set_devctl)(struct ata_port *ap, u8 ctl);
+ </programlisting>
+
+ <para>
+ Write the device control ATA shadow register to the hardware.
+ Most drivers don't need to define this.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Select ATA device on bus</title>
+ <programlisting>
+void (*sff_dev_select)(struct ata_port *ap, unsigned int device);
+ </programlisting>
+
+ <para>
+ Issues the low-level hardware command(s) that causes one of N
+ hardware devices to be considered 'selected' (active and
+ available for use) on the ATA bus. This generally has no
+ meaning on FIS-based devices.
+ </para>
+ <para>
+ Most drivers for taskfile-based hardware use
+ ata_sff_dev_select() for this hook.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Private tuning method</title>
+ <programlisting>
+void (*set_mode) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+ By default libata performs drive and controller tuning in
+ accordance with the ATA timing rules and also applies blacklists
+ and cable limits. Some controllers need special handling and have
+ custom tuning rules, typically raid controllers that use ATA
+ commands but do not actually do drive timing.
+ </para>
+
+ <warning>
+ <para>
+ This hook should not be used to replace the standard controller
+ tuning logic when a controller has quirks. Replacing the default
+ tuning logic in that case would bypass handling for drive and
+ bridge quirks that may be important to data reliability. If a
+ controller needs to filter the mode selection it should use the
+ mode_filter hook instead.
+ </para>
+ </warning>
+
+ </sect2>
+
+ <sect2><title>Control PCI IDE BMDMA engine</title>
+ <programlisting>
+void (*bmdma_setup) (struct ata_queued_cmd *qc);
+void (*bmdma_start) (struct ata_queued_cmd *qc);
+void (*bmdma_stop) (struct ata_port *ap);
+u8 (*bmdma_status) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+When setting up an IDE BMDMA transaction, these hooks arm
+(->bmdma_setup), fire (->bmdma_start), and halt (->bmdma_stop)
+the hardware's DMA engine. ->bmdma_status is used to read the standard
+PCI IDE DMA Status register.
+ </para>
+
+ <para>
+These hooks are typically either no-ops, or simply not implemented, in
+FIS-based drivers.
+ </para>
+ <para>
+Most legacy IDE drivers use ata_bmdma_setup() for the bmdma_setup()
+hook. ata_bmdma_setup() will write the pointer to the PRD table to
+the IDE PRD Table Address register, enable DMA in the DMA Command
+register, and call exec_command() to begin the transfer.
+ </para>
+ <para>
+Most legacy IDE drivers use ata_bmdma_start() for the bmdma_start()
+hook. ata_bmdma_start() will write the ATA_DMA_START flag to the DMA
+Command register.
+ </para>
+ <para>
+Many legacy IDE drivers use ata_bmdma_stop() for the bmdma_stop()
+hook. ata_bmdma_stop() clears the ATA_DMA_START flag in the DMA
+command register.
+ </para>
+ <para>
+Many legacy IDE drivers use ata_bmdma_status() as the bmdma_status() hook.
+ </para>
+
+ </sect2>
+
+ <sect2><title>High-level taskfile hooks</title>
+ <programlisting>
+void (*qc_prep) (struct ata_queued_cmd *qc);
+int (*qc_issue) (struct ata_queued_cmd *qc);
+ </programlisting>
+
+ <para>
+ Higher-level hooks, these two hooks can potentially supercede
+ several of the above taskfile/DMA engine hooks. ->qc_prep is
+ called after the buffers have been DMA-mapped, and is typically
+ used to populate the hardware's DMA scatter-gather table.
+ Most drivers use the standard ata_qc_prep() helper function, but
+ more advanced drivers roll their own.
+ </para>
+ <para>
+ ->qc_issue is used to make a command active, once the hardware
+ and S/G tables have been prepared. IDE BMDMA drivers use the
+ helper function ata_qc_issue_prot() for taskfile protocol-based
+ dispatch. More advanced drivers implement their own ->qc_issue.
+ </para>
+ <para>
+ ata_qc_issue_prot() calls ->tf_load(), ->bmdma_setup(), and
+ ->bmdma_start() as necessary to initiate a transfer.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Exception and probe handling (EH)</title>
+ <programlisting>
+void (*eng_timeout) (struct ata_port *ap);
+void (*phy_reset) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+Deprecated. Use ->error_handler() instead.
+ </para>
+
+ <programlisting>
+void (*freeze) (struct ata_port *ap);
+void (*thaw) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+ata_port_freeze() is called when HSM violations or some other
+condition disrupts normal operation of the port. A frozen port
+is not allowed to perform any operation until the port is
+thawed, which usually follows a successful reset.
+ </para>
+
+ <para>
+The optional ->freeze() callback can be used for freezing the port
+hardware-wise (e.g. mask interrupt and stop DMA engine). If a
+port cannot be frozen hardware-wise, the interrupt handler
+must ack and clear interrupts unconditionally while the port
+is frozen.
+ </para>
+ <para>
+The optional ->thaw() callback is called to perform the opposite of ->freeze():
+prepare the port for normal operation once again. Unmask interrupts,
+start DMA engine, etc.
+ </para>
+
+ <programlisting>
+void (*error_handler) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+->error_handler() is a driver's hook into probe, hotplug, and recovery
+and other exceptional conditions. The primary responsibility of an
+implementation is to call ata_do_eh() or ata_bmdma_drive_eh() with a set
+of EH hooks as arguments:
+ </para>
+
+ <para>
+'prereset' hook (may be NULL) is called during an EH reset, before any other actions
+are taken.
+ </para>
+
+ <para>
+'postreset' hook (may be NULL) is called after the EH reset is performed. Based on
+existing conditions, severity of the problem, and hardware capabilities,
+ </para>
+
+ <para>
+Either 'softreset' (may be NULL) or 'hardreset' (may be NULL) will be
+called to perform the low-level EH reset.
+ </para>
+
+ <programlisting>
+void (*post_internal_cmd) (struct ata_queued_cmd *qc);
+ </programlisting>
+
+ <para>
+Perform any hardware-specific actions necessary to finish processing
+after executing a probe-time or EH-time command via ata_exec_internal().
+ </para>
+
+ </sect2>
+
+ <sect2><title>Hardware interrupt handling</title>
+ <programlisting>
+irqreturn_t (*irq_handler)(int, void *, struct pt_regs *);
+void (*irq_clear) (struct ata_port *);
+ </programlisting>
+
+ <para>
+ ->irq_handler is the interrupt handling routine registered with
+ the system, by libata. ->irq_clear is called during probe just
+ before the interrupt handler is registered, to be sure hardware
+ is quiet.
+ </para>
+ <para>
+ The second argument, dev_instance, should be cast to a pointer
+ to struct ata_host_set.
+ </para>
+ <para>
+ Most legacy IDE drivers use ata_sff_interrupt() for the
+ irq_handler hook, which scans all ports in the host_set,
+ determines which queued command was active (if any), and calls
+ ata_sff_host_intr(ap,qc).
+ </para>
+ <para>
+ Most legacy IDE drivers use ata_sff_irq_clear() for the
+ irq_clear() hook, which simply clears the interrupt and error
+ flags in the DMA status register.
+ </para>
+
+ </sect2>
+
+ <sect2><title>SATA phy read/write</title>
+ <programlisting>
+int (*scr_read) (struct ata_port *ap, unsigned int sc_reg,
+ u32 *val);
+int (*scr_write) (struct ata_port *ap, unsigned int sc_reg,
+ u32 val);
+ </programlisting>
+
+ <para>
+ Read and write standard SATA phy registers. Currently only used
+ if ->phy_reset hook called the sata_phy_reset() helper function.
+ sc_reg is one of SCR_STATUS, SCR_CONTROL, SCR_ERROR, or SCR_ACTIVE.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Init and shutdown</title>
+ <programlisting>
+int (*port_start) (struct ata_port *ap);
+void (*port_stop) (struct ata_port *ap);
+void (*host_stop) (struct ata_host_set *host_set);
+ </programlisting>
+
+ <para>
+ ->port_start() is called just after the data structures for each
+ port are initialized. Typically this is used to alloc per-port
+ DMA buffers / tables / rings, enable DMA engines, and similar
+ tasks. Some drivers also use this entry point as a chance to
+ allocate driver-private memory for ap->private_data.
+ </para>
+ <para>
+ Many drivers use ata_port_start() as this hook or call
+ it from their own port_start() hooks. ata_port_start()
+ allocates space for a legacy IDE PRD table and returns.
+ </para>
+ <para>
+ ->port_stop() is called after ->host_stop(). Its sole function
+ is to release DMA/memory resources, now that they are no longer
+ actively being used. Many drivers also free driver-private
+ data from port at this time.
+ </para>
+ <para>
+ ->host_stop() is called after all ->port_stop() calls
+have completed. The hook must finalize hardware shutdown, release DMA
+and other resources, etc.
+ This hook may be specified as NULL, in which case it is not called.
+ </para>
+
+ </sect2>
+
+ </sect1>
+ </chapter>
+
+ <chapter id="libataEH">
+ <title>Error handling</title>
+
+ <para>
+ This chapter describes how errors are handled under libata.
+ Readers are advised to read SCSI EH
+ (Documentation/scsi/scsi_eh.txt) and ATA exceptions doc first.
+ </para>
+
+ <sect1><title>Origins of commands</title>
+ <para>
+ In libata, a command is represented with struct ata_queued_cmd
+ or qc. qc's are preallocated during port initialization and
+ repetitively used for command executions. Currently only one
+ qc is allocated per port but yet-to-be-merged NCQ branch
+ allocates one for each tag and maps each qc to NCQ tag 1-to-1.
+ </para>
+ <para>
+ libata commands can originate from two sources - libata itself
+ and SCSI midlayer. libata internal commands are used for
+ initialization and error handling. All normal blk requests
+ and commands for SCSI emulation are passed as SCSI commands
+ through queuecommand callback of SCSI host template.
+ </para>
+ </sect1>
+
+ <sect1><title>How commands are issued</title>
+
+ <variablelist>
+
+ <varlistentry><term>Internal commands</term>
+ <listitem>
+ <para>
+ First, qc is allocated and initialized using
+ ata_qc_new_init(). Although ata_qc_new_init() doesn't
+ implement any wait or retry mechanism when qc is not
+ available, internal commands are currently issued only during
+ initialization and error recovery, so no other command is
+ active and allocation is guaranteed to succeed.
+ </para>
+ <para>
+ Once allocated qc's taskfile is initialized for the command to
+ be executed. qc currently has two mechanisms to notify
+ completion. One is via qc->complete_fn() callback and the
+ other is completion qc->waiting. qc->complete_fn() callback
+ is the asynchronous path used by normal SCSI translated
+ commands and qc->waiting is the synchronous (issuer sleeps in
+ process context) path used by internal commands.
+ </para>
+ <para>
+ Once initialization is complete, host_set lock is acquired
+ and the qc is issued.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>SCSI commands</term>
+ <listitem>
+ <para>
+ All libata drivers use ata_scsi_queuecmd() as
+ hostt->queuecommand callback. scmds can either be simulated
+ or translated. No qc is involved in processing a simulated
+ scmd. The result is computed right away and the scmd is
+ completed.
+ </para>
+ <para>
+ For a translated scmd, ata_qc_new_init() is invoked to
+ allocate a qc and the scmd is translated into the qc. SCSI
+ midlayer's completion notification function pointer is stored
+ into qc->scsidone.
+ </para>
+ <para>
+ qc->complete_fn() callback is used for completion
+ notification. ATA commands use ata_scsi_qc_complete() while
+ ATAPI commands use atapi_qc_complete(). Both functions end up
+ calling qc->scsidone to notify upper layer when the qc is
+ finished. After translation is completed, the qc is issued
+ with ata_qc_issue().
+ </para>
+ <para>
+ Note that SCSI midlayer invokes hostt->queuecommand while
+ holding host_set lock, so all above occur while holding
+ host_set lock.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </sect1>
+
+ <sect1><title>How commands are processed</title>
+ <para>
+ Depending on which protocol and which controller are used,
+ commands are processed differently. For the purpose of
+ discussion, a controller which uses taskfile interface and all
+ standard callbacks is assumed.
+ </para>
+ <para>
+ Currently 6 ATA command protocols are used. They can be
+ sorted into the following four categories according to how
+ they are processed.
+ </para>
+
+ <variablelist>
+ <varlistentry><term>ATA NO DATA or DMA</term>
+ <listitem>
+ <para>
+ ATA_PROT_NODATA and ATA_PROT_DMA fall into this category.
+ These types of commands don't require any software
+ intervention once issued. Device will raise interrupt on
+ completion.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>ATA PIO</term>
+ <listitem>
+ <para>
+ ATA_PROT_PIO is in this category. libata currently
+ implements PIO with polling. ATA_NIEN bit is set to turn
+ off interrupt and pio_task on ata_wq performs polling and
+ IO.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>ATAPI NODATA or DMA</term>
+ <listitem>
+ <para>
+ ATA_PROT_ATAPI_NODATA and ATA_PROT_ATAPI_DMA are in this
+ category. packet_task is used to poll BSY bit after
+ issuing PACKET command. Once BSY is turned off by the
+ device, packet_task transfers CDB and hands off processing
+ to interrupt handler.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>ATAPI PIO</term>
+ <listitem>
+ <para>
+ ATA_PROT_ATAPI is in this category. ATA_NIEN bit is set
+ and, as in ATAPI NODATA or DMA, packet_task submits cdb.
+ However, after submitting cdb, further processing (data
+ transfer) is handed off to pio_task.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </sect1>
+
+ <sect1><title>How commands are completed</title>
+ <para>
+ Once issued, all qc's are either completed with
+ ata_qc_complete() or time out. For commands which are handled
+ by interrupts, ata_host_intr() invokes ata_qc_complete(), and,
+ for PIO tasks, pio_task invokes ata_qc_complete(). In error
+ cases, packet_task may also complete commands.
+ </para>
+ <para>
+ ata_qc_complete() does the following.
+ </para>
+
+ <orderedlist>
+
+ <listitem>
+ <para>
+ DMA memory is unmapped.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ ATA_QCFLAG_ACTIVE is clared from qc->flags.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ qc->complete_fn() callback is invoked. If the return value of
+ the callback is not zero. Completion is short circuited and
+ ata_qc_complete() returns.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ __ata_qc_complete() is called, which does
+ <orderedlist>
+
+ <listitem>
+ <para>
+ qc->flags is cleared to zero.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ ap->active_tag and qc->tag are poisoned.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ qc->waiting is claread &amp; completed (in that order).
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ qc is deallocated by clearing appropriate bit in ap->qactive.
+ </para>
+ </listitem>
+
+ </orderedlist>
+ </para>
+ </listitem>
+
+ </orderedlist>
+
+ <para>
+ So, it basically notifies upper layer and deallocates qc. One
+ exception is short-circuit path in #3 which is used by
+ atapi_qc_complete().
+ </para>
+ <para>
+ For all non-ATAPI commands, whether it fails or not, almost
+ the same code path is taken and very little error handling
+ takes place. A qc is completed with success status if it
+ succeeded, with failed status otherwise.
+ </para>
+ <para>
+ However, failed ATAPI commands require more handling as
+ REQUEST SENSE is needed to acquire sense data. If an ATAPI
+ command fails, ata_qc_complete() is invoked with error status,
+ which in turn invokes atapi_qc_complete() via
+ qc->complete_fn() callback.
+ </para>
+ <para>
+ This makes atapi_qc_complete() set scmd->result to
+ SAM_STAT_CHECK_CONDITION, complete the scmd and return 1. As
+ the sense data is empty but scmd->result is CHECK CONDITION,
+ SCSI midlayer will invoke EH for the scmd, and returning 1
+ makes ata_qc_complete() to return without deallocating the qc.
+ This leads us to ata_scsi_error() with partially completed qc.
+ </para>
+
+ </sect1>
+
+ <sect1><title>ata_scsi_error()</title>
+ <para>
+ ata_scsi_error() is the current transportt->eh_strategy_handler()
+ for libata. As discussed above, this will be entered in two
+ cases - timeout and ATAPI error completion. This function
+ calls low level libata driver's eng_timeout() callback, the
+ standard callback for which is ata_eng_timeout(). It checks
+ if a qc is active and calls ata_qc_timeout() on the qc if so.
+ Actual error handling occurs in ata_qc_timeout().
+ </para>
+ <para>
+ If EH is invoked for timeout, ata_qc_timeout() stops BMDMA and
+ completes the qc. Note that as we're currently in EH, we
+ cannot call scsi_done. As described in SCSI EH doc, a
+ recovered scmd should be either retried with
+ scsi_queue_insert() or finished with scsi_finish_command().
+ Here, we override qc->scsidone with scsi_finish_command() and
+ calls ata_qc_complete().
+ </para>
+ <para>
+ If EH is invoked due to a failed ATAPI qc, the qc here is
+ completed but not deallocated. The purpose of this
+ half-completion is to use the qc as place holder to make EH
+ code reach this place. This is a bit hackish, but it works.
+ </para>
+ <para>
+ Once control reaches here, the qc is deallocated by invoking
+ __ata_qc_complete() explicitly. Then, internal qc for REQUEST
+ SENSE is issued. Once sense data is acquired, scmd is
+ finished by directly invoking scsi_finish_command() on the
+ scmd. Note that as we already have completed and deallocated
+ the qc which was associated with the scmd, we don't need
+ to/cannot call ata_qc_complete() again.
+ </para>
+
+ </sect1>
+
+ <sect1><title>Problems with the current EH</title>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ Error representation is too crude. Currently any and all
+ error conditions are represented with ATA STATUS and ERROR
+ registers. Errors which aren't ATA device errors are treated
+ as ATA device errors by setting ATA_ERR bit. Better error
+ descriptor which can properly represent ATA and other
+ errors/exceptions is needed.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ When handling timeouts, no action is taken to make device
+ forget about the timed out command and ready for new commands.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ EH handling via ata_scsi_error() is not properly protected
+ from usual command processing. On EH entrance, the device is
+ not in quiescent state. Timed out commands may succeed or
+ fail any time. pio_task and atapi_task may still be running.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Too weak error recovery. Devices / controllers causing HSM
+ mismatch errors and other errors quite often require reset to
+ return to known state. Also, advanced error handling is
+ necessary to support features like NCQ and hotplug.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ ATA errors are directly handled in the interrupt handler and
+ PIO errors in pio_task. This is problematic for advanced
+ error handling for the following reasons.
+ </para>
+ <para>
+ First, advanced error handling often requires context and
+ internal qc execution.
+ </para>
+ <para>
+ Second, even a simple failure (say, CRC error) needs
+ information gathering and could trigger complex error handling
+ (say, resetting &amp; reconfiguring). Having multiple code
+ paths to gather information, enter EH and trigger actions
+ makes life painful.
+ </para>
+ <para>
+ Third, scattered EH code makes implementing low level drivers
+ difficult. Low level drivers override libata callbacks. If
+ EH is scattered over several places, each affected callbacks
+ should perform its part of error handling. This can be error
+ prone and painful.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+ </sect1>
+ </chapter>
+
+ <chapter id="libataExt">
+ <title>libata Library</title>
+!Edrivers/ata/libata-core.c
+ </chapter>
+
+ <chapter id="libataInt">
+ <title>libata Core Internals</title>
+!Idrivers/ata/libata-core.c
+ </chapter>
+
+ <chapter id="libataScsiInt">
+ <title>libata SCSI translation/emulation</title>
+!Edrivers/ata/libata-scsi.c
+!Idrivers/ata/libata-scsi.c
+ </chapter>
+
+ <chapter id="ataExceptions">
+ <title>ATA errors and exceptions</title>
+
+ <para>
+ This chapter tries to identify what error/exception conditions exist
+ for ATA/ATAPI devices and describe how they should be handled in
+ implementation-neutral way.
+ </para>
+
+ <para>
+ The term 'error' is used to describe conditions where either an
+ explicit error condition is reported from device or a command has
+ timed out.
+ </para>
+
+ <para>
+ The term 'exception' is either used to describe exceptional
+ conditions which are not errors (say, power or hotplug events), or
+ to describe both errors and non-error exceptional conditions. Where
+ explicit distinction between error and exception is necessary, the
+ term 'non-error exception' is used.
+ </para>
+
+ <sect1 id="excat">
+ <title>Exception categories</title>
+ <para>
+ Exceptions are described primarily with respect to legacy
+ taskfile + bus master IDE interface. If a controller provides
+ other better mechanism for error reporting, mapping those into
+ categories described below shouldn't be difficult.
+ </para>
+
+ <para>
+ In the following sections, two recovery actions - reset and
+ reconfiguring transport - are mentioned. These are described
+ further in <xref linkend="exrec"/>.
+ </para>
+
+ <sect2 id="excatHSMviolation">
+ <title>HSM violation</title>
+ <para>
+ This error is indicated when STATUS value doesn't match HSM
+ requirement during issuing or execution any ATA/ATAPI command.
+ </para>
+
+ <itemizedlist>
+ <title>Examples</title>
+
+ <listitem>
+ <para>
+ ATA_STATUS doesn't contain !BSY &amp;&amp; DRDY &amp;&amp; !DRQ while trying
+ to issue a command.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ !BSY &amp;&amp; !DRQ during PIO data transfer.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ DRQ on command completion.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ !BSY &amp;&amp; ERR after CDB transfer starts but before the
+ last byte of CDB is transferred. ATA/ATAPI standard states
+ that &quot;The device shall not terminate the PACKET command
+ with an error before the last byte of the command packet has
+ been written&quot; in the error outputs description of PACKET
+ command and the state diagram doesn't include such
+ transitions.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ In these cases, HSM is violated and not much information
+ regarding the error can be acquired from STATUS or ERROR
+ register. IOW, this error can be anything - driver bug,
+ faulty device, controller and/or cable.
+ </para>
+
+ <para>
+ As HSM is violated, reset is necessary to restore known state.
+ Reconfiguring transport for lower speed might be helpful too
+ as transmission errors sometimes cause this kind of errors.
+ </para>
+ </sect2>
+
+ <sect2 id="excatDevErr">
+ <title>ATA/ATAPI device error (non-NCQ / non-CHECK CONDITION)</title>
+
+ <para>
+ These are errors detected and reported by ATA/ATAPI devices
+ indicating device problems. For this type of errors, STATUS
+ and ERROR register values are valid and describe error
+ condition. Note that some of ATA bus errors are detected by
+ ATA/ATAPI devices and reported using the same mechanism as
+ device errors. Those cases are described later in this
+ section.
+ </para>
+
+ <para>
+ For ATA commands, this type of errors are indicated by !BSY
+ &amp;&amp; ERR during command execution and on completion.
+ </para>
+
+ <para>For ATAPI commands,</para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ !BSY &amp;&amp; ERR &amp;&amp; ABRT right after issuing PACKET
+ indicates that PACKET command is not supported and falls in
+ this category.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ !BSY &amp;&amp; ERR(==CHK) &amp;&amp; !ABRT after the last
+ byte of CDB is transferred indicates CHECK CONDITION and
+ doesn't fall in this category.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ !BSY &amp;&amp; ERR(==CHK) &amp;&amp; ABRT after the last byte
+ of CDB is transferred *probably* indicates CHECK CONDITION and
+ doesn't fall in this category.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ Of errors detected as above, the followings are not ATA/ATAPI
+ device errors but ATA bus errors and should be handled
+ according to <xref linkend="excatATAbusErr"/>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>CRC error during data transfer</term>
+ <listitem>
+ <para>
+ This is indicated by ICRC bit in the ERROR register and
+ means that corruption occurred during data transfer. Up to
+ ATA/ATAPI-7, the standard specifies that this bit is only
+ applicable to UDMA transfers but ATA/ATAPI-8 draft revision
+ 1f says that the bit may be applicable to multiword DMA and
+ PIO.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>ABRT error during data transfer or on completion</term>
+ <listitem>
+ <para>
+ Up to ATA/ATAPI-7, the standard specifies that ABRT could be
+ set on ICRC errors and on cases where a device is not able
+ to complete a command. Combined with the fact that MWDMA
+ and PIO transfer errors aren't allowed to use ICRC bit up to
+ ATA/ATAPI-7, it seems to imply that ABRT bit alone could
+ indicate transfer errors.
+ </para>
+ <para>
+ However, ATA/ATAPI-8 draft revision 1f removes the part
+ that ICRC errors can turn on ABRT. So, this is kind of
+ gray area. Some heuristics are needed here.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ <para>
+ ATA/ATAPI device errors can be further categorized as follows.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>Media errors</term>
+ <listitem>
+ <para>
+ This is indicated by UNC bit in the ERROR register. ATA
+ devices reports UNC error only after certain number of
+ retries cannot recover the data, so there's nothing much
+ else to do other than notifying upper layer.
+ </para>
+ <para>
+ READ and WRITE commands report CHS or LBA of the first
+ failed sector but ATA/ATAPI standard specifies that the
+ amount of transferred data on error completion is
+ indeterminate, so we cannot assume that sectors preceding
+ the failed sector have been transferred and thus cannot
+ complete those sectors successfully as SCSI does.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>Media changed / media change requested error</term>
+ <listitem>
+ <para>
+ &lt;&lt;TODO: fill here&gt;&gt;
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>Address error</term>
+ <listitem>
+ <para>
+ This is indicated by IDNF bit in the ERROR register.
+ Report to upper layer.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>Other errors</term>
+ <listitem>
+ <para>
+ This can be invalid command or parameter indicated by ABRT
+ ERROR bit or some other error condition. Note that ABRT
+ bit can indicate a lot of things including ICRC and Address
+ errors. Heuristics needed.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ <para>
+ Depending on commands, not all STATUS/ERROR bits are
+ applicable. These non-applicable bits are marked with
+ &quot;na&quot; in the output descriptions but up to ATA/ATAPI-7
+ no definition of &quot;na&quot; can be found. However,
+ ATA/ATAPI-8 draft revision 1f describes &quot;N/A&quot; as
+ follows.
+ </para>
+
+ <blockquote>
+ <variablelist>
+ <varlistentry><term>3.2.3.3a N/A</term>
+ <listitem>
+ <para>
+ A keyword the indicates a field has no defined value in
+ this standard and should not be checked by the host or
+ device. N/A fields should be cleared to zero.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </blockquote>
+
+ <para>
+ So, it seems reasonable to assume that &quot;na&quot; bits are
+ cleared to zero by devices and thus need no explicit masking.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatATAPIcc">
+ <title>ATAPI device CHECK CONDITION</title>
+
+ <para>
+ ATAPI device CHECK CONDITION error is indicated by set CHK bit
+ (ERR bit) in the STATUS register after the last byte of CDB is
+ transferred for a PACKET command. For this kind of errors,
+ sense data should be acquired to gather information regarding
+ the errors. REQUEST SENSE packet command should be used to
+ acquire sense data.
+ </para>
+
+ <para>
+ Once sense data is acquired, this type of errors can be
+ handled similary to other SCSI errors. Note that sense data
+ may indicate ATA bus error (e.g. Sense Key 04h HARDWARE ERROR
+ &amp;&amp; ASC/ASCQ 47h/00h SCSI PARITY ERROR). In such
+ cases, the error should be considered as an ATA bus error and
+ handled according to <xref linkend="excatATAbusErr"/>.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatNCQerr">
+ <title>ATA device error (NCQ)</title>
+
+ <para>
+ NCQ command error is indicated by cleared BSY and set ERR bit
+ during NCQ command phase (one or more NCQ commands
+ outstanding). Although STATUS and ERROR registers will
+ contain valid values describing the error, READ LOG EXT is
+ required to clear the error condition, determine which command
+ has failed and acquire more information.
+ </para>
+
+ <para>
+ READ LOG EXT Log Page 10h reports which tag has failed and
+ taskfile register values describing the error. With this
+ information the failed command can be handled as a normal ATA
+ command error as in <xref linkend="excatDevErr"/> and all
+ other in-flight commands must be retried. Note that this
+ retry should not be counted - it's likely that commands
+ retried this way would have completed normally if it were not
+ for the failed command.
+ </para>
+
+ <para>
+ Note that ATA bus errors can be reported as ATA device NCQ
+ errors. This should be handled as described in <xref
+ linkend="excatATAbusErr"/>.
+ </para>
+
+ <para>
+ If READ LOG EXT Log Page 10h fails or reports NQ, we're
+ thoroughly screwed. This condition should be treated
+ according to <xref linkend="excatHSMviolation"/>.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatATAbusErr">
+ <title>ATA bus error</title>
+
+ <para>
+ ATA bus error means that data corruption occurred during
+ transmission over ATA bus (SATA or PATA). This type of errors
+ can be indicated by
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ ICRC or ABRT error as described in <xref linkend="excatDevErr"/>.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Controller-specific error completion with error information
+ indicating transmission error.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ On some controllers, command timeout. In this case, there may
+ be a mechanism to determine that the timeout is due to
+ transmission error.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Unknown/random errors, timeouts and all sorts of weirdities.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ As described above, transmission errors can cause wide variety
+ of symptoms ranging from device ICRC error to random device
+ lockup, and, for many cases, there is no way to tell if an
+ error condition is due to transmission error or not;
+ therefore, it's necessary to employ some kind of heuristic
+ when dealing with errors and timeouts. For example,
+ encountering repetitive ABRT errors for known supported
+ command is likely to indicate ATA bus error.
+ </para>
+
+ <para>
+ Once it's determined that ATA bus errors have possibly
+ occurred, lowering ATA bus transmission speed is one of
+ actions which may alleviate the problem. See <xref
+ linkend="exrecReconf"/> for more information.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatPCIbusErr">
+ <title>PCI bus error</title>
+
+ <para>
+ Data corruption or other failures during transmission over PCI
+ (or other system bus). For standard BMDMA, this is indicated
+ by Error bit in the BMDMA Status register. This type of
+ errors must be logged as it indicates something is very wrong
+ with the system. Resetting host controller is recommended.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatLateCompletion">
+ <title>Late completion</title>
+
+ <para>
+ This occurs when timeout occurs and the timeout handler finds
+ out that the timed out command has completed successfully or
+ with error. This is usually caused by lost interrupts. This
+ type of errors must be logged. Resetting host controller is
+ recommended.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatUnknown">
+ <title>Unknown error (timeout)</title>
+
+ <para>
+ This is when timeout occurs and the command is still
+ processing or the host and device are in unknown state. When
+ this occurs, HSM could be in any valid or invalid state. To
+ bring the device to known state and make it forget about the
+ timed out command, resetting is necessary. The timed out
+ command may be retried.
+ </para>
+
+ <para>
+ Timeouts can also be caused by transmission errors. Refer to
+ <xref linkend="excatATAbusErr"/> for more details.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatHoplugPM">
+ <title>Hotplug and power management exceptions</title>
+
+ <para>
+ &lt;&lt;TODO: fill here&gt;&gt;
+ </para>
+
+ </sect2>
+
+ </sect1>
+
+ <sect1 id="exrec">
+ <title>EH recovery actions</title>
+
+ <para>
+ This section discusses several important recovery actions.
+ </para>
+
+ <sect2 id="exrecClr">
+ <title>Clearing error condition</title>
+
+ <para>
+ Many controllers require its error registers to be cleared by
+ error handler. Different controllers may have different
+ requirements.
+ </para>
+
+ <para>
+ For SATA, it's strongly recommended to clear at least SError
+ register during error handling.
+ </para>
+ </sect2>
+
+ <sect2 id="exrecRst">
+ <title>Reset</title>
+
+ <para>
+ During EH, resetting is necessary in the following cases.
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ HSM is in unknown or invalid state
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ HBA is in unknown or invalid state
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ EH needs to make HBA/device forget about in-flight commands
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ HBA/device behaves weirdly
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ Resetting during EH might be a good idea regardless of error
+ condition to improve EH robustness. Whether to reset both or
+ either one of HBA and device depends on situation but the
+ following scheme is recommended.
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ When it's known that HBA is in ready state but ATA/ATAPI
+ device is in unknown state, reset only device.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ If HBA is in unknown state, reset both HBA and device.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ HBA resetting is implementation specific. For a controller
+ complying to taskfile/BMDMA PCI IDE, stopping active DMA
+ transaction may be sufficient iff BMDMA state is the only HBA
+ context. But even mostly taskfile/BMDMA PCI IDE complying
+ controllers may have implementation specific requirements and
+ mechanism to reset themselves. This must be addressed by
+ specific drivers.
+ </para>
+
+ <para>
+ OTOH, ATA/ATAPI standard describes in detail ways to reset
+ ATA/ATAPI devices.
+ </para>
+
+ <variablelist>
+
+ <varlistentry><term>PATA hardware reset</term>
+ <listitem>
+ <para>
+ This is hardware initiated device reset signalled with
+ asserted PATA RESET- signal. There is no standard way to
+ initiate hardware reset from software although some
+ hardware provides registers that allow driver to directly
+ tweak the RESET- signal.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>Software reset</term>
+ <listitem>
+ <para>
+ This is achieved by turning CONTROL SRST bit on for at
+ least 5us. Both PATA and SATA support it but, in case of
+ SATA, this may require controller-specific support as the
+ second Register FIS to clear SRST should be transmitted
+ while BSY bit is still set. Note that on PATA, this resets
+ both master and slave devices on a channel.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>EXECUTE DEVICE DIAGNOSTIC command</term>
+ <listitem>
+ <para>
+ Although ATA/ATAPI standard doesn't describe exactly, EDD
+ implies some level of resetting, possibly similar level
+ with software reset. Host-side EDD protocol can be handled
+ with normal command processing and most SATA controllers
+ should be able to handle EDD's just like other commands.
+ As in software reset, EDD affects both devices on a PATA
+ bus.
+ </para>
+ <para>
+ Although EDD does reset devices, this doesn't suit error
+ handling as EDD cannot be issued while BSY is set and it's
+ unclear how it will act when device is in unknown/weird
+ state.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>ATAPI DEVICE RESET command</term>
+ <listitem>
+ <para>
+ This is very similar to software reset except that reset
+ can be restricted to the selected device without affecting
+ the other device sharing the cable.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>SATA phy reset</term>
+ <listitem>
+ <para>
+ This is the preferred way of resetting a SATA device. In
+ effect, it's identical to PATA hardware reset. Note that
+ this can be done with the standard SCR Control register.
+ As such, it's usually easier to implement than software
+ reset.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ <para>
+ One more thing to consider when resetting devices is that
+ resetting clears certain configuration parameters and they
+ need to be set to their previous or newly adjusted values
+ after reset.
+ </para>
+
+ <para>
+ Parameters affected are.
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ CHS set up with INITIALIZE DEVICE PARAMETERS (seldom used)
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Parameters set with SET FEATURES including transfer mode setting
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Block count set with SET MULTIPLE MODE
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Other parameters (SET MAX, MEDIA LOCK...)
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ ATA/ATAPI standard specifies that some parameters must be
+ maintained across hardware or software reset, but doesn't
+ strictly specify all of them. Always reconfiguring needed
+ parameters after reset is required for robustness. Note that
+ this also applies when resuming from deep sleep (power-off).
+ </para>
+
+ <para>
+ Also, ATA/ATAPI standard requires that IDENTIFY DEVICE /
+ IDENTIFY PACKET DEVICE is issued after any configuration
+ parameter is updated or a hardware reset and the result used
+ for further operation. OS driver is required to implement
+ revalidation mechanism to support this.
+ </para>
+
+ </sect2>
+
+ <sect2 id="exrecReconf">
+ <title>Reconfigure transport</title>
+
+ <para>
+ For both PATA and SATA, a lot of corners are cut for cheap
+ connectors, cables or controllers and it's quite common to see
+ high transmission error rate. This can be mitigated by
+ lowering transmission speed.
+ </para>
+
+ <para>
+ The following is a possible scheme Jeff Garzik suggested.
+ </para>
+
+ <blockquote>
+ <para>
+ If more than $N (3?) transmission errors happen in 15 minutes,
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ if SATA, decrease SATA PHY speed. if speed cannot be decreased,
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ decrease UDMA xfer speed. if at UDMA0, switch to PIO4,
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ decrease PIO xfer speed. if at PIO3, complain, but continue
+ </para>
+ </listitem>
+ </itemizedlist>
+ </blockquote>
+
+ </sect2>
+
+ </sect1>
+
+ </chapter>
+
+ <chapter id="PiixInt">
+ <title>ata_piix Internals</title>
+!Idrivers/ata/ata_piix.c
+ </chapter>
+
+ <chapter id="SILInt">
+ <title>sata_sil Internals</title>
+!Idrivers/ata/sata_sil.c
+ </chapter>
+
+ <chapter id="libataThanks">
+ <title>Thanks</title>
+ <para>
+ The bulk of the ATA knowledge comes thanks to long conversations with
+ Andre Hedrick (www.linux-ide.org), and long hours pondering the ATA
+ and SCSI specifications.
+ </para>
+ <para>
+ Thanks to Alan Cox for pointing out similarities
+ between SATA and SCSI, and in general for motivation to hack on
+ libata.
+ </para>
+ <para>
+ libata's device detection
+ method, ata_pio_devchk, and in general all the early probing was
+ based on extensive study of Hale Landis's probe/reset code in his
+ ATADRVR driver (www.ata-atapi.com).
+ </para>
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/librs.tmpl b/Documentation/DocBook/librs.tmpl
new file mode 100644
index 00000000..94f21361
--- /dev/null
+++ b/Documentation/DocBook/librs.tmpl
@@ -0,0 +1,289 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Reed-Solomon-Library-Guide">
+ <bookinfo>
+ <title>Reed-Solomon Library Programming Interface</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Thomas</firstname>
+ <surname>Gleixner</surname>
+ <affiliation>
+ <address>
+ <email>tglx@linutronix.de</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2004</year>
+ <holder>Thomas Gleixner</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ The generic Reed-Solomon Library provides encoding, decoding
+ and error correction functions.
+ </para>
+ <para>
+ Reed-Solomon codes are used in communication and storage
+ applications to ensure data integrity.
+ </para>
+ <para>
+ This documentation is provided for developers who want to utilize
+ the functions provided by the library.
+ </para>
+ </chapter>
+
+ <chapter id="bugs">
+ <title>Known Bugs And Assumptions</title>
+ <para>
+ None.
+ </para>
+ </chapter>
+
+ <chapter id="usage">
+ <title>Usage</title>
+ <para>
+ This chapter provides examples of how to use the library.
+ </para>
+ <sect1>
+ <title>Initializing</title>
+ <para>
+ The init function init_rs returns a pointer to an
+ rs decoder structure, which holds the necessary
+ information for encoding, decoding and error correction
+ with the given polynomial. It either uses an existing
+ matching decoder or creates a new one. On creation all
+ the lookup tables for fast en/decoding are created.
+ The function may take a while, so make sure not to
+ call it in critical code paths.
+ </para>
+ <programlisting>
+/* the Reed Solomon control structure */
+static struct rs_control *rs_decoder;
+
+/* Symbolsize is 10 (bits)
+ * Primitive polynomial is x^10+x^3+1
+ * first consecutive root is 0
+ * primitive element to generate roots = 1
+ * generator polynomial degree (number of roots) = 6
+ */
+rs_decoder = init_rs (10, 0x409, 0, 1, 6);
+ </programlisting>
+ </sect1>
+ <sect1>
+ <title>Encoding</title>
+ <para>
+ The encoder calculates the Reed-Solomon code over
+ the given data length and stores the result in
+ the parity buffer. Note that the parity buffer must
+ be initialized before calling the encoder.
+ </para>
+ <para>
+ The expanded data can be inverted on the fly by
+ providing a non-zero inversion mask. The expanded data is
+ XOR'ed with the mask. This is used e.g. for FLASH
+ ECC, where the all 0xFF is inverted to an all 0x00.
+ The Reed-Solomon code for all 0x00 is all 0x00. The
+ code is inverted before storing to FLASH so it is 0xFF
+ too. This prevents that reading from an erased FLASH
+ results in ECC errors.
+ </para>
+ <para>
+ The databytes are expanded to the given symbol size
+ on the fly. There is no support for encoding continuous
+ bitstreams with a symbol size != 8 at the moment. If
+ it is necessary it should be not a big deal to implement
+ such functionality.
+ </para>
+ <programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6];
+/* Initialize the parity buffer */
+memset(par, 0, sizeof(par));
+/* Encode 512 byte in data8. Store parity in buffer par */
+encode_rs8 (rs_decoder, data8, 512, par, 0);
+ </programlisting>
+ </sect1>
+ <sect1>
+ <title>Decoding</title>
+ <para>
+ The decoder calculates the syndrome over
+ the given data length and the received parity symbols
+ and corrects errors in the data.
+ </para>
+ <para>
+ If a syndrome is available from a hardware decoder
+ then the syndrome calculation is skipped.
+ </para>
+ <para>
+ The correction of the data buffer can be suppressed
+ by providing a correction pattern buffer and an error
+ location buffer to the decoder. The decoder stores the
+ calculated error location and the correction bitmask
+ in the given buffers. This is useful for hardware
+ decoders which use a weird bit ordering scheme.
+ </para>
+ <para>
+ The databytes are expanded to the given symbol size
+ on the fly. There is no support for decoding continuous
+ bitstreams with a symbolsize != 8 at the moment. If
+ it is necessary it should be not a big deal to implement
+ such functionality.
+ </para>
+
+ <sect2>
+ <title>
+ Decoding with syndrome calculation, direct data correction
+ </title>
+ <programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6];
+uint8_t data[512];
+int numerr;
+/* Receive data */
+.....
+/* Receive parity */
+.....
+/* Decode 512 byte in data8.*/
+numerr = decode_rs8 (rs_decoder, data8, par, 512, NULL, 0, NULL, 0, NULL);
+ </programlisting>
+ </sect2>
+
+ <sect2>
+ <title>
+ Decoding with syndrome given by hardware decoder, direct data correction
+ </title>
+ <programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6], syn[6];
+uint8_t data[512];
+int numerr;
+/* Receive data */
+.....
+/* Receive parity */
+.....
+/* Get syndrome from hardware decoder */
+.....
+/* Decode 512 byte in data8.*/
+numerr = decode_rs8 (rs_decoder, data8, par, 512, syn, 0, NULL, 0, NULL);
+ </programlisting>
+ </sect2>
+
+ <sect2>
+ <title>
+ Decoding with syndrome given by hardware decoder, no direct data correction.
+ </title>
+ <para>
+ Note: It's not necessary to give data and received parity to the decoder.
+ </para>
+ <programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6], syn[6], corr[8];
+uint8_t data[512];
+int numerr, errpos[8];
+/* Receive data */
+.....
+/* Receive parity */
+.....
+/* Get syndrome from hardware decoder */
+.....
+/* Decode 512 byte in data8.*/
+numerr = decode_rs8 (rs_decoder, NULL, NULL, 512, syn, 0, errpos, 0, corr);
+for (i = 0; i &lt; numerr; i++) {
+ do_error_correction_in_your_buffer(errpos[i], corr[i]);
+}
+ </programlisting>
+ </sect2>
+ </sect1>
+ <sect1>
+ <title>Cleanup</title>
+ <para>
+ The function free_rs frees the allocated resources,
+ if the caller is the last user of the decoder.
+ </para>
+ <programlisting>
+/* Release resources */
+free_rs(rs_decoder);
+ </programlisting>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="structs">
+ <title>Structures</title>
+ <para>
+ This chapter contains the autogenerated documentation of the structures which are
+ used in the Reed-Solomon Library and are relevant for a developer.
+ </para>
+!Iinclude/linux/rslib.h
+ </chapter>
+
+ <chapter id="pubfunctions">
+ <title>Public Functions Provided</title>
+ <para>
+ This chapter contains the autogenerated documentation of the Reed-Solomon functions
+ which are exported.
+ </para>
+!Elib/reed_solomon/reed_solomon.c
+ </chapter>
+
+ <chapter id="credits">
+ <title>Credits</title>
+ <para>
+ The library code for encoding and decoding was written by Phil Karn.
+ </para>
+ <programlisting>
+ Copyright 2002, Phil Karn, KA9Q
+ May be used under the terms of the GNU General Public License (GPL)
+ </programlisting>
+ <para>
+ The wrapper functions and interfaces are written by Thomas Gleixner.
+ </para>
+ <para>
+ Many users have provided bugfixes, improvements and helping hands for testing.
+ Thanks a lot.
+ </para>
+ <para>
+ The following people have contributed to this document:
+ </para>
+ <para>
+ Thomas Gleixner<email>tglx@linutronix.de</email>
+ </para>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/lsm.tmpl b/Documentation/DocBook/lsm.tmpl
new file mode 100644
index 00000000..fe7664ce
--- /dev/null
+++ b/Documentation/DocBook/lsm.tmpl
@@ -0,0 +1,265 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<article class="whitepaper" id="LinuxSecurityModule" lang="en">
+ <articleinfo>
+ <title>Linux Security Modules: General Security Hooks for Linux</title>
+ <authorgroup>
+ <author>
+ <firstname>Stephen</firstname>
+ <surname>Smalley</surname>
+ <affiliation>
+ <orgname>NAI Labs</orgname>
+ <address><email>ssmalley@nai.com</email></address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Timothy</firstname>
+ <surname>Fraser</surname>
+ <affiliation>
+ <orgname>NAI Labs</orgname>
+ <address><email>tfraser@nai.com</email></address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Chris</firstname>
+ <surname>Vance</surname>
+ <affiliation>
+ <orgname>NAI Labs</orgname>
+ <address><email>cvance@nai.com</email></address>
+ </affiliation>
+ </author>
+ </authorgroup>
+ </articleinfo>
+
+<sect1 id="Introduction"><title>Introduction</title>
+
+<para>
+In March 2001, the National Security Agency (NSA) gave a presentation
+about Security-Enhanced Linux (SELinux) at the 2.5 Linux Kernel
+Summit. SELinux is an implementation of flexible and fine-grained
+nondiscretionary access controls in the Linux kernel, originally
+implemented as its own particular kernel patch. Several other
+security projects (e.g. RSBAC, Medusa) have also developed flexible
+access control architectures for the Linux kernel, and various
+projects have developed particular access control models for Linux
+(e.g. LIDS, DTE, SubDomain). Each project has developed and
+maintained its own kernel patch to support its security needs.
+</para>
+
+<para>
+In response to the NSA presentation, Linus Torvalds made a set of
+remarks that described a security framework he would be willing to
+consider for inclusion in the mainstream Linux kernel. He described a
+general framework that would provide a set of security hooks to
+control operations on kernel objects and a set of opaque security
+fields in kernel data structures for maintaining security attributes.
+This framework could then be used by loadable kernel modules to
+implement any desired model of security. Linus also suggested the
+possibility of migrating the Linux capabilities code into such a
+module.
+</para>
+
+<para>
+The Linux Security Modules (LSM) project was started by WireX to
+develop such a framework. LSM is a joint development effort by
+several security projects, including Immunix, SELinux, SGI and Janus,
+and several individuals, including Greg Kroah-Hartman and James
+Morris, to develop a Linux kernel patch that implements this
+framework. The patch is currently tracking the 2.4 series and is
+targeted for integration into the 2.5 development series. This
+technical report provides an overview of the framework and the example
+capabilities security module provided by the LSM kernel patch.
+</para>
+
+</sect1>
+
+<sect1 id="framework"><title>LSM Framework</title>
+
+<para>
+The LSM kernel patch provides a general kernel framework to support
+security modules. In particular, the LSM framework is primarily
+focused on supporting access control modules, although future
+development is likely to address other security needs such as
+auditing. By itself, the framework does not provide any additional
+security; it merely provides the infrastructure to support security
+modules. The LSM kernel patch also moves most of the capabilities
+logic into an optional security module, with the system defaulting
+to the traditional superuser logic. This capabilities module
+is discussed further in <xref linkend="cap"/>.
+</para>
+
+<para>
+The LSM kernel patch adds security fields to kernel data structures
+and inserts calls to hook functions at critical points in the kernel
+code to manage the security fields and to perform access control. It
+also adds functions for registering and unregistering security
+modules, and adds a general <function>security</function> system call
+to support new system calls for security-aware applications.
+</para>
+
+<para>
+The LSM security fields are simply <type>void*</type> pointers. For
+process and program execution security information, security fields
+were added to <structname>struct task_struct</structname> and
+<structname>struct linux_binprm</structname>. For filesystem security
+information, a security field was added to
+<structname>struct super_block</structname>. For pipe, file, and socket
+security information, security fields were added to
+<structname>struct inode</structname> and
+<structname>struct file</structname>. For packet and network device security
+information, security fields were added to
+<structname>struct sk_buff</structname> and
+<structname>struct net_device</structname>. For System V IPC security
+information, security fields were added to
+<structname>struct kern_ipc_perm</structname> and
+<structname>struct msg_msg</structname>; additionally, the definitions
+for <structname>struct msg_msg</structname>, <structname>struct
+msg_queue</structname>, and <structname>struct
+shmid_kernel</structname> were moved to header files
+(<filename>include/linux/msg.h</filename> and
+<filename>include/linux/shm.h</filename> as appropriate) to allow
+the security modules to use these definitions.
+</para>
+
+<para>
+Each LSM hook is a function pointer in a global table,
+security_ops. This table is a
+<structname>security_operations</structname> structure as defined by
+<filename>include/linux/security.h</filename>. Detailed documentation
+for each hook is included in this header file. At present, this
+structure consists of a collection of substructures that group related
+hooks based on the kernel object (e.g. task, inode, file, sk_buff,
+etc) as well as some top-level hook function pointers for system
+operations. This structure is likely to be flattened in the future
+for performance. The placement of the hook calls in the kernel code
+is described by the "called:" lines in the per-hook documentation in
+the header file. The hook calls can also be easily found in the
+kernel code by looking for the string "security_ops->".
+
+</para>
+
+<para>
+Linus mentioned per-process security hooks in his original remarks as a
+possible alternative to global security hooks. However, if LSM were
+to start from the perspective of per-process hooks, then the base
+framework would have to deal with how to handle operations that
+involve multiple processes (e.g. kill), since each process might have
+its own hook for controlling the operation. This would require a
+general mechanism for composing hooks in the base framework.
+Additionally, LSM would still need global hooks for operations that
+have no process context (e.g. network input operations).
+Consequently, LSM provides global security hooks, but a security
+module is free to implement per-process hooks (where that makes sense)
+by storing a security_ops table in each process' security field and
+then invoking these per-process hooks from the global hooks.
+The problem of composition is thus deferred to the module.
+</para>
+
+<para>
+The global security_ops table is initialized to a set of hook
+functions provided by a dummy security module that provides
+traditional superuser logic. A <function>register_security</function>
+function (in <filename>security/security.c</filename>) is provided to
+allow a security module to set security_ops to refer to its own hook
+functions, and an <function>unregister_security</function> function is
+provided to revert security_ops to the dummy module hooks. This
+mechanism is used to set the primary security module, which is
+responsible for making the final decision for each hook.
+</para>
+
+<para>
+LSM also provides a simple mechanism for stacking additional security
+modules with the primary security module. It defines
+<function>register_security</function> and
+<function>unregister_security</function> hooks in the
+<structname>security_operations</structname> structure and provides
+<function>mod_reg_security</function> and
+<function>mod_unreg_security</function> functions that invoke these
+hooks after performing some sanity checking. A security module can
+call these functions in order to stack with other modules. However,
+the actual details of how this stacking is handled are deferred to the
+module, which can implement these hooks in any way it wishes
+(including always returning an error if it does not wish to support
+stacking). In this manner, LSM again defers the problem of
+composition to the module.
+</para>
+
+<para>
+Although the LSM hooks are organized into substructures based on
+kernel object, all of the hooks can be viewed as falling into two
+major categories: hooks that are used to manage the security fields
+and hooks that are used to perform access control. Examples of the
+first category of hooks include the
+<function>alloc_security</function> and
+<function>free_security</function> hooks defined for each kernel data
+structure that has a security field. These hooks are used to allocate
+and free security structures for kernel objects. The first category
+of hooks also includes hooks that set information in the security
+field after allocation, such as the <function>post_lookup</function>
+hook in <structname>struct inode_security_ops</structname>. This hook
+is used to set security information for inodes after successful lookup
+operations. An example of the second category of hooks is the
+<function>permission</function> hook in
+<structname>struct inode_security_ops</structname>. This hook checks
+permission when accessing an inode.
+</para>
+
+</sect1>
+
+<sect1 id="cap"><title>LSM Capabilities Module</title>
+
+<para>
+The LSM kernel patch moves most of the existing POSIX.1e capabilities
+logic into an optional security module stored in the file
+<filename>security/capability.c</filename>. This change allows
+users who do not want to use capabilities to omit this code entirely
+from their kernel, instead using the dummy module for traditional
+superuser logic or any other module that they desire. This change
+also allows the developers of the capabilities logic to maintain and
+enhance their code more freely, without needing to integrate patches
+back into the base kernel.
+</para>
+
+<para>
+In addition to moving the capabilities logic, the LSM kernel patch
+could move the capability-related fields from the kernel data
+structures into the new security fields managed by the security
+modules. However, at present, the LSM kernel patch leaves the
+capability fields in the kernel data structures. In his original
+remarks, Linus suggested that this might be preferable so that other
+security modules can be easily stacked with the capabilities module
+without needing to chain multiple security structures on the security field.
+It also avoids imposing extra overhead on the capabilities module
+to manage the security fields. However, the LSM framework could
+certainly support such a move if it is determined to be desirable,
+with only a few additional changes described below.
+</para>
+
+<para>
+At present, the capabilities logic for computing process capabilities
+on <function>execve</function> and <function>set*uid</function>,
+checking capabilities for a particular process, saving and checking
+capabilities for netlink messages, and handling the
+<function>capget</function> and <function>capset</function> system
+calls have been moved into the capabilities module. There are still a
+few locations in the base kernel where capability-related fields are
+directly examined or modified, but the current version of the LSM
+patch does allow a security module to completely replace the
+assignment and testing of capabilities. These few locations would
+need to be changed if the capability-related fields were moved into
+the security field. The following is a list of known locations that
+still perform such direct examination or modification of
+capability-related fields:
+<itemizedlist>
+<listitem><para><filename>fs/open.c</filename>:<function>sys_access</function></para></listitem>
+<listitem><para><filename>fs/lockd/host.c</filename>:<function>nlm_bind_host</function></para></listitem>
+<listitem><para><filename>fs/nfsd/auth.c</filename>:<function>nfsd_setuser</function></para></listitem>
+<listitem><para><filename>fs/proc/array.c</filename>:<function>task_cap</function></para></listitem>
+</itemizedlist>
+</para>
+
+</sect1>
+
+</article>
diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile
new file mode 100644
index 00000000..f9fd6154
--- /dev/null
+++ b/Documentation/DocBook/media/Makefile
@@ -0,0 +1,388 @@
+###
+# Media build rules - Auto-generates media contents/indexes and *.h xml's
+#
+
+SHELL=/bin/bash
+
+MEDIA_OBJ_DIR=$(objtree)/Documentation/DocBook/
+MEDIA_SRC_DIR=$(srctree)/Documentation/DocBook/media
+
+MEDIA_TEMP = media-entities.tmpl \
+ media-indices.tmpl \
+ videodev2.h.xml \
+ v4l2.xml \
+ audio.h.xml \
+ ca.h.xml \
+ dmx.h.xml \
+ frontend.h.xml \
+ net.h.xml \
+ video.h.xml \
+
+IMGFILES := $(patsubst %.b64,%, $(notdir $(shell ls $(MEDIA_SRC_DIR)/*.b64)))
+OBJIMGFILES := $(addprefix $(MEDIA_OBJ_DIR)/, $(IMGFILES))
+GENFILES := $(addprefix $(MEDIA_OBJ_DIR)/, $(MEDIA_TEMP))
+
+PHONY += cleanmediadocs
+
+cleanmediadocs:
+ -@rm `find $(MEDIA_OBJ_DIR) -type l` $(GENFILES) $(OBJIMGFILES) 2>/dev/null
+
+$(obj)/media_api.xml: $(GENFILES) FORCE
+
+#$(MEDIA_OBJ_DIR)/media_api.html: $(MEDIA_OBJ_DIR)/media_api.xml
+#$(MEDIA_OBJ_DIR)/media_api.pdf: $(MEDIA_OBJ_DIR)/media_api.xml
+#$(MEDIA_OBJ_DIR)/media_api.ps: $(MEDIA_OBJ_DIR)/media_api.xml
+
+V4L_SGMLS = \
+ $(shell ls $(MEDIA_SRC_DIR)/v4l/*.xml|perl -ne 'print "$$1 " if (m,.*/(.*)\n,)') \
+ capture.c.xml \
+ keytable.c.xml \
+ v4l2grab.c.xml
+
+DVB_SGMLS = \
+ $(shell ls $(MEDIA_SRC_DIR)/dvb/*.xml|perl -ne 'print "$$1 " if (m,.*/(.*)\n,)')
+
+MEDIA_SGMLS = $(addprefix ./,$(V4L_SGMLS)) $(addprefix ./,$(DVB_SGMLS)) $(addprefix ./,$(MEDIA_TEMP))
+
+FUNCS = \
+ close \
+ ioctl \
+ mmap \
+ munmap \
+ open \
+ poll \
+ read \
+ select \
+ write \
+
+IOCTLS = \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/videodev2.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/audio.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/ca.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/dmx.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/frontend.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([A-Z][^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/net.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/video.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/media.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/v4l2-subdev.h) \
+ VIDIOC_SUBDEV_G_FRAME_INTERVAL \
+ VIDIOC_SUBDEV_S_FRAME_INTERVAL \
+ VIDIOC_SUBDEV_ENUM_MBUS_CODE \
+ VIDIOC_SUBDEV_ENUM_FRAME_SIZE \
+ VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL \
+ VIDIOC_SUBDEV_G_SELECTION \
+ VIDIOC_SUBDEV_S_SELECTION \
+
+TYPES = \
+ $(shell perl -ne 'print "$$1 " if /^typedef\s+[^\s]+\s+([^\s]+)\;/' $(srctree)/include/uapi/linux/videodev2.h) \
+ $(shell perl -ne 'print "$$1 " if /^}\s+([a-z0-9_]+_t)/' $(srctree)/include/uapi/linux/dvb/frontend.h)
+
+ENUMS = \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/videodev2.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/audio.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/ca.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/dmx.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/frontend.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/net.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/video.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/media.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/v4l2-mediabus.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/v4l2-subdev.h)
+
+STRUCTS = \
+ $(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/videodev2.h) \
+ $(shell perl -ne 'print "$$1 " if (/^struct\s+([^\s\{]+)\s*/)' $(srctree)/include/uapi/linux/dvb/audio.h) \
+ $(shell perl -ne 'print "$$1 " if (/^struct\s+([^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/ca.h) \
+ $(shell perl -ne 'print "$$1 " if (/^struct\s+([^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/dmx.h) \
+ $(shell perl -ne 'print "$$1 " if (!/dtv\_cmds\_h/ && /^struct\s+([^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/frontend.h) \
+ $(shell perl -ne 'print "$$1 " if (/^struct\s+([A-Z][^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/net.h) \
+ $(shell perl -ne 'print "$$1 " if (/^struct\s+([^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/video.h) \
+ $(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/media.h) \
+ $(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/v4l2-subdev.h) \
+ $(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/v4l2-mediabus.h)
+
+ERRORS = \
+ E2BIG \
+ EACCES \
+ EAGAIN \
+ EBADF \
+ EBADFD \
+ EBADR \
+ EBADRQC \
+ EBUSY \
+ ECHILD \
+ ECONNRESET \
+ EDEADLK \
+ EDOM \
+ EEXIST \
+ EFAULT \
+ EFBIG \
+ EILSEQ \
+ EINIT \
+ EINPROGRESS \
+ EINTR \
+ EINVAL \
+ EIO \
+ EMFILE \
+ ENFILE \
+ ENOBUFS \
+ ENODATA \
+ ENODEV \
+ ENOENT \
+ ENOIOCTLCMD \
+ ENOMEM \
+ ENOSPC \
+ ENOSR \
+ ENOSYS \
+ ENOTSUP \
+ ENOTSUPP \
+ ENOTTY \
+ ENXIO \
+ EOPNOTSUPP \
+ EOVERFLOW \
+ EPERM \
+ EPIPE \
+ EPROTO \
+ ERANGE \
+ EREMOTE \
+ EREMOTEIO \
+ ERESTART \
+ ERESTARTSYS \
+ ESHUTDOWN \
+ ESPIPE \
+ ETIME \
+ ETIMEDOUT \
+ EUSERS \
+ EWOULDBLOCK \
+ EXDEV \
+
+ESCAPE = \
+ -e "s/&/\\&amp;/g" \
+ -e "s/</\\&lt;/g" \
+ -e "s/>/\\&gt;/g"
+
+FILENAME = \
+ -e s,"^[^\/]*/",, \
+ -e s/"\\.xml"// \
+ -e s/"\\.tmpl"// \
+ -e s/\\\./-/g \
+ -e s/"^func-"// \
+ -e s/"^pixfmt-"// \
+ -e s/"^vidioc-"//
+
+# Generate references to these structs in videodev2.h.xml.
+DOCUMENTED = \
+ -e "s/\(enum *\)v4l2_mpeg_cx2341x_video_\([a-z]*_spatial_filter_type\)/\1<link linkend=\"\2\">v4l2_mpeg_cx2341x_video_\2<\/link>/g" \
+ -e "s/\(\(enum\|struct\) *\)\(v4l2_[a-zA-Z0-9_]*\)/\1<link linkend=\"\3\">\3<\/link>/g" \
+ -e "s/\(V4L2_PIX_FMT_[A-Z0-9_]\+\) /<link linkend=\"\1\">\1<\/link> /g" \
+ -e ":a;s/\(linkend=\".*\)_\(.*\">\)/\1-\2/;ta" \
+ -e "s/v4l2\-mpeg\-vbi\-ITV0/v4l2-mpeg-vbi-itv0-1/g"
+
+DVB_DOCUMENTED = \
+ -e "s/\(linkend\=\"\)FE_SET_PROPERTY/\1FE_GET_PROPERTY/g" \
+ -e "s,\(struct\s\+\)\([a-z0-9_]\+\)\(\s\+{\),\1\<link linkend=\"\2\">\2\<\/link\>\3,g" \
+ -e "s,\(}\s\+\)\([a-z0-9_]\+_t\+\),\1\<link linkend=\"\2\">\2\<\/link\>,g" \
+ -e "s,\(define\s\+\)\(DTV_[A-Z0-9_]\+\)\(\s\+[0-9]\+\),\1\<link linkend=\"\2\">\2\<\/link\>\3,g" \
+ -e "s,<link\s\+linkend=\".*\">\(DTV_IOCTL_MAX_MSGS\|dtv_cmds_h\|__.*_old\)<\/link>,\1,g" \
+ -e ":a;s/\(linkend=\".*\)_\(.*\">\)/\1-\2/;ta" \
+ -e "s,\(audio-mixer\|audio-karaoke\|audio-status\|ca-slot-info\|ca-descr-info\|ca-caps\|ca-msg\|ca-descr\|ca-pid\|dmx-filter\|dmx-caps\|video-system\|video-highlight\|video-spu\|video-spu-palette\|video-navi-pack\)-t,\1,g" \
+ -e "s,DTV-ISDBT-LAYER[A-C],DTV-ISDBT-LAYER,g" \
+ -e "s,\(define\s\+\)\([A-Z0-9_]\+\)\(\s\+_IO\),\1\<link linkend=\"\2\">\2\<\/link\>\3,g" \
+ -e "s,<link\s\+linkend=\".*\">\(__.*_OLD\)<\/link>,\1,g" \
+
+#
+# Media targets and dependencies
+#
+
+install_media_images = \
+ $(Q)cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api
+
+$(MEDIA_OBJ_DIR)/%: $(MEDIA_SRC_DIR)/%.b64
+ $(Q)base64 -d $< >$@
+
+$(MEDIA_OBJ_DIR)/v4l2.xml: $(OBJIMGFILES)
+ @$($(quiet)gen_xml)
+ @(ln -sf $(MEDIA_SRC_DIR)/v4l/*xml $(MEDIA_OBJ_DIR)/)
+ @(ln -sf $(MEDIA_SRC_DIR)/dvb/*xml $(MEDIA_OBJ_DIR)/)
+
+$(MEDIA_OBJ_DIR)/videodev2.h.xml: $(srctree)/include/uapi/linux/videodev2.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/audio.h.xml: $(srctree)/include/uapi/linux/dvb/audio.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/ca.h.xml: $(srctree)/include/uapi/linux/dvb/ca.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/dmx.h.xml: $(srctree)/include/uapi/linux/dvb/dmx.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/frontend.h.xml: $(srctree)/include/uapi/linux/dvb/frontend.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/net.h.xml: $(srctree)/include/uapi/linux/dvb/net.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/video.h.xml: $(srctree)/include/uapi/linux/dvb/video.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/media-entities.tmpl: $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<!-- Generated file! Do not edit. -->") >$@
+ @( \
+ echo -e "\n<!-- Functions -->") >>$@
+ @( \
+ for ident in $(FUNCS) ; do \
+ entity=`echo $$ident | tr _ -` ; \
+ echo "<!ENTITY func-$$entity \"<link" \
+ "linkend='func-$$entity'><function>$$ident()</function></link>\">" \
+ >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Ioctls -->") >>$@
+ @( \
+ for ident in $(IOCTLS) ; do \
+ entity=`echo $$ident | tr _ -` ; \
+ id=`grep "<refname>$$ident" $(MEDIA_OBJ_DIR)/vidioc-*.xml $(MEDIA_OBJ_DIR)/media-ioc-*.xml | sed -r s,"^.*/(.*).xml.*","\1",` ; \
+ echo "<!ENTITY $$entity \"<link" \
+ "linkend='$$id'><constant>$$ident</constant></link>\">" \
+ >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Types -->") >>$@
+ @( \
+ for ident in $(TYPES) ; do \
+ entity=`echo $$ident | tr _ -` ; \
+ echo "<!ENTITY $$entity \"<link" \
+ "linkend='$$entity'>$$ident</link>\">" >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Enums -->") >>$@
+ @( \
+ for ident in $(ENUMS) ; do \
+ entity=`echo $$ident | sed -e "s/v4l2_mpeg_cx2341x_video_\([a-z]*_spatial_filter_type\)/\1/" | tr _ -` ; \
+ echo "<!ENTITY $$entity \"enum&nbsp;<link" \
+ "linkend='$$entity'>$$ident</link>\">" >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Structures -->") >>$@
+ @( \
+ for ident in $(STRUCTS) ; do \
+ entity=`echo $$ident | tr _ - | sed s/v4l2-mpeg-vbi-ITV0/v4l2-mpeg-vbi-itv0-1/g` ; \
+ echo "<!ENTITY $$entity \"struct&nbsp;<link" \
+ "linkend='$$entity'>$$ident</link>\">" >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Error Codes -->") >>$@
+ @( \
+ for ident in $(ERRORS) ; do \
+ echo "<!ENTITY $$ident \"<errorcode>$$ident</errorcode>" \
+ "error code\">" >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Subsections -->") >>$@
+ @( \
+ for file in $(MEDIA_SGMLS) ; do \
+ entity=`echo "$$file" | sed $(FILENAME) -e s/"^([^-]*)"/sub\1/` ; \
+ if ! echo "$$file" | \
+ grep -q -E -e '^(func|vidioc|pixfmt)-' ; then \
+ echo "<!ENTITY sub-$$entity SYSTEM \"$$file\">" >>$@ ; \
+ fi ; \
+ done)
+ @( \
+ echo -e "\n<!-- Function Reference -->") >>$@
+ @( \
+ for file in $(MEDIA_SGMLS) ; do \
+ if echo "$$file" | \
+ grep -q -E -e '(func|vidioc|pixfmt)-' ; then \
+ entity=`echo "$$file" |sed $(FILENAME)` ; \
+ echo "<!ENTITY $$entity SYSTEM \"$$file\">" >>$@ ; \
+ fi ; \
+ done)
+
+# Jade can auto-generate a list-of-tables, which includes all structs,
+# but we only want data types, all types, and sorted please.
+$(MEDIA_OBJ_DIR)/media-indices.tmpl: $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<!-- Generated file! Do not edit. -->") >$@
+ @( \
+ echo -e "\n<index><title>List of Types</title>") >>$@
+ @( \
+ for ident in $(TYPES) ; do \
+ id=`echo $$ident | tr _ -` ; \
+ echo "<indexentry><primaryie><link" \
+ "linkend='$$id'>$$ident</link></primaryie></indexentry>" >>$@ ; \
+ done)
+ @( \
+ for ident in $(ENUMS) ; do \
+ id=`echo $$ident | sed -e "s/v4l2_mpeg_cx2341x_video_\([a-z]*_spatial_filter_type\)/\1/" | tr _ -`; \
+ echo "<indexentry><primaryie>enum&nbsp;<link" \
+ "linkend='$$id'>$$ident</link></primaryie></indexentry>" >>$@ ; \
+ done)
+ @( \
+ for ident in $(STRUCTS) ; do \
+ id=`echo $$ident | tr _ - | sed s/v4l2-mpeg-vbi-ITV0/v4l2-mpeg-vbi-itv0-1/g` ; \
+ echo "<indexentry><primaryie>struct&nbsp;<link" \
+ "linkend='$$id'>$$ident</link></primaryie></indexentry>" >>$@ ; \
+ done)
+ @( \
+ echo "</index>") >>$@
+
diff --git a/Documentation/DocBook/media/bayer.png.b64 b/Documentation/DocBook/media/bayer.png.b64
new file mode 100644
index 00000000..ccdf2bcd
--- /dev/null
+++ b/Documentation/DocBook/media/bayer.png.b64
@@ -0,0 +1,171 @@
+iVBORw0KGgoAAAANSUhEUgAAAlgAAACqCAMAAABGfcHVAAAAAXNSR0IArs4c6QAAAwBQTFRFAAIA
+CAICAAQVEQEBAgsAJgECAAogAwsTAQopHQYBNAEAAAxNARQAERIQAhoDABwAABZEHRQKGRYKQw0F
+ACMBACUAERwpHR4cVRAFBR5rZhADACR2JiIhBDAGAiWGgQ4AcxQABDYACSeQMSYlJykmESxYlQ4A
+PSYZIS05OSsJHS5JOC8kAEMDUC8SADXLNDUzADbEAEsAADX/2RABCFIAAD/qxB0AAD//BFgAK0Vp
+WT4r3hwA3RsTRERAAEf/5CIA2iYCCUv+WUgz7iIAOk5g3CgVSU5SiD8uB2sABm8AE1X/U1RQOFyL
+4jkfIlz/RV98M1j+G2H/fVk23jtD4T0pXl9ieFtGcV894UIiYWJfAIwA50gOV2p+4kssO2j+dGZx
+bG1qVmj/OHH/aHJzfnBX5lQ7B50AZnahdXd0AKUG5V1ARnz/6mErCqgAAKsAent46GBIW4GhAK0A
+AK8B42FtALIOin9/ALUAiIOBALkAVIf/6WxWg4eBi4SKJrEAmoVtdY2geoP/rYVXhoyOqYVuJbUh
+IrgWX5D/jo6J7nszP7gAsI9S63xnN70zZqO/fZzCOb4+cZr+64dy8otYnJ6b7ImDRcM56IqcWMEo
+oJb/N8ZoTMRL7Y9/QchcsaOTo6eohaj/7ZqKXspXj6v9xal+oK+7d7vTUM+Afco5r7CumLTVStKV
+bs9ukbb/9qx/9q9l8queoLv/e9R66beG7rDImNRhi9aDwsPAs8bWzcK2cd67jtqP5MWUodyB8b+1
+tMr/z8L/j9+kbOXWnN2ZstD7yc7Rzs7Ly9xb183UwdD/+si/qeOmvuKIx9fj4tPCtuWiqOrL+tS2
+y9v++NPK2dvZt+m0ueq80+Wo3OeSwuy/yezG+d7f/eS/z/DS3uf/6Ono4PC71O39xPb02vPZ/+nR
++Ori6e399+vt+PGz+ur65fL55/Xb4vbh7ffX/PPY8vP9+vLy6Pf36fjr/PfM8vjr//f+/vn48P36
+9vv+/vzf+fv4/fvu//z7+v7//P/7/v/8//QpxAAAAAFiS0dEAIgFHUgAAAAJcEhZcwAAFY8AABWW
+AQ2TT8cAAAAHdElNRQfaCRQXGSltwbPRAAAgAElEQVR42u2dDXwU1bXAZwEJtEaNH1nbh68fpoWK
+iE1ao2Bgo9RqIrEg+BIFmqLYLOlMcHHlU6DiQmrJM2jKo0QIBHgUjD5ETcQIlKq0gKDmA+UjiRAT
+BCOBkGzC5re/++6987Ezszszdzc7s9jfPa2wO+zMPefc/5575t67Z5hB/0Ek/W668xckcmVmQZ5S
+CvLmgshl4QCiZu+8ntCOgWlzVfrl5ZZFrl6T/VYSv9x5K3Pj9wnkh9fFFxQE6VcVqXY+8PjgH5K0
++/0bBxDaYcsN0i+vLlTbzH9kjEknkEF3zptjLPPmXL2VwGC/nxysm+YRyc+/S2bHNYUgmtJkf5RI
+vScH3HEvifz05mhqB8G68d6xJO3ecSWhHXYfYdvM99LHGEv6mEF3zmFJ5Gr49e9qVUh7O/wP/w/9
+gf4EXnKwbpjNGQs779bvktlxzULg7TCQzvDAItBvzqMD7hjrMJaxPx0Cv3OdBvqFBRZJs46xCCwi
+O+xNwNfSclom6F2L4j1A/UsG1hgI1jyWUzLEKf/gX0CwevIzsvSlJoyh8IY5LmPhEFhEhsCI9b7L
+oy/uI2GBRaDfPATWaGO596dDADhioJ+7PKyI5SBoF4NFZAcEa6ZjvL7MOg9MAWtPxv4aHdlfM315
+TMHy7Gg4pifN5cUxBMsPisub9dRrqHc1xBCsC7vHH6jVlQOO3eGBhccc9B+rGIWkP/ALBNYEA3uX
+xxasooMGbVaWxhSs0kr9Njs8zbEE60C2UbOTTAOrR6/ZHjB/ZWzBet+gzR0xBmuHfpttsQbLIEP2
+ZpsGVrsBWMspWBQsGrEoWFEAK1UUDbBkQEkJu+Ko+WDxDRmApWmH+WCF0u/bCFYIMyIHK30CL1kZ
+Y1J17wo51snhW1/4d9BdoZlgcZx7mcezzM1yemBp22E2WBzL66fsExVYjmxBxsNed1gHVra8XX2w
+WBc2A/4dDbCSp4v/2PrGb1L1hkKnZ8sRNFH39cel6K1lQyFbvLcZXf3YrmWsNlg6dpgMFltc3dAN
+j3+zazWrCVbKBun8ltcfS3FYBpb0D721L+uCxXoqxO5VfEMiBmsa6BL/+UxWqhZYMFytPSVd5yMU
+qKxJ3jlub7f4D5f+xmqDpW2HuWCxr0r69b7N6oAV6JsTj6VYBpaciP9L0QaLVXQv13ewUqeBdjyS
+ZM0/Cf6uBRbkak03uLSraBHnWfsJAJ/LEi2TIxZs7bPyZS6XZwu0XEaWCiwdO0wFi3sXgC/K4QDi
+qfhEoV8QWNtT8FLK+L90gddHWwjWjNGw1dG/mgW7/jFNsFjYvd/sKnK73Kh7P4oSWHw3JOcDkJGq
+BVbxBfD5IidKqpzOV/3gb05rwGJfRXEAfYM41nMKfMXpgaVhh5lgsVsAeJvj9YOMXVrE6YAlvHwa
+XJSFLJPBOg8m8W2lpLwFQ5YjNFgc6t45OFCx0OVgNRu1iIVEByznu+ArIUixnPMfKGRZARaCSRpf
+ENx/4wwiVgg7TASLc52CA4f4BiobCFmaYDlSusBUC8GaGgC6VgssFnavS3QtC7uXiyJYMP09o5m8
+O2GfOsW8il1TudoisF4FX8hGvy3lc1yGYAXZYSZYa+RBitvy9hyXIVij744RWP+jDRb8ygaCFLdm
+x7KoJO/tyWj2Jz3/JPhjssY8lnNL91cvsNL8KOtk1fNY5iTv3D/AP2UJMaubvGvZYSJY8Jv+T04+
+8eAyBCsFdvBXVg6F2UK7k85oDoUs7N5FsiwjSsk7v5cKkqHsD3nEcm4BnznxHINTENaaCVJpcBGn
+zXQilpYdZoL1iThSB+kXBNbu8VOhzFhwAICXrUzeF2RPnTpp6qy/nAG9YzWSd5gpfqZhRl/AkpjY
+P0HrrtBZDQ468ZuKHVgqXdYk793Ag4zkllXyDZfq5FhadpgJVjMoxZ3g3sHrV84ZzmMB8LpjdCym
+G3r/oDXdwFaD97EZHG9FxQ53VHKsadOh5K8/q51jYbDwC/FSiywFixX7/Sirk2Np2GEmWA2gHOvn
+Efe3aCfvXiTA27J9lpVLOl7cLvyH2g2PaU6QSmCx4mXcXDTASkaSmpxxEvw1VXsofBLPt79/9AgU
+2DJr5VDIFh2rh9IM6vXA0rDDgqGQW4b1awAN+neFvzoDvpTPjlqVvD8Nw+ToFG2wxKGQO3gUmnEk
+GmAlS/M/Y5KXg5pkLbD45F3IsdhgsExO3vHS5JMV2mDp2GFJ8o71KzYCK+VhSJYjxXKw4A0DeF0P
+LDF5xxOVXLQiltg384PAktaanxSmG+D9AkrtEFhWLEKzr4Jv+FsUNOizO/QjloYd5k439C6SVIID
+doPRPNbTXeA96yPW6JS3AFCkWMrpBg/qXmmYcEcbLO2IxTrfBRdfcAqYOZ1WDYVozvGf0s2vkxAs
+6yIWGqs/l9ZsnWtBsxFYKHa8bOEitDiPBQfhc49prhWyqHuliWhX1HIsvI1JL8eCMJ0CF1ezeBxk
+iz+xLMdCSyYfzRZugbd0gCO6OVZoO0xd0lnTDT57QdiktqYDtBnOvMPYcc7CRWhpghSmWW9qgoVW
+EC6u5uMGh7s3KmBNQzJ9+UnQpTnzzjmLTwHwRUVxcemWBnjnusuqRWi0ctX5cXlR8dq9HQB8s1pv
+SUfDDlMjFkxPQO/H5auKy/e2of0XhmuFKQ93gTctHwpHO1ColA+GqkVovntXFQndG5WZd0m6fqe9
+bYZzej6RPvjZ6qAJUtP2vLNrpP0c53bNYXVm3rXsMHnbzFrJL727XtCbIA0srYA/pVg33SAu6dx9
+BpyQ3Teot80oujc6E6TtWBpr1mfobPRDUrzlSEfH10d3FcEbBws3+rnX7m3o6Pjm43K9jX46dpi8
+0Y9zFe891tHZ/HHFMo5zEawV/uo4+HKsVWCdli1C+2F2p7nRj+OK+O7dUeRio7vnPdVoazIr3/Ru
+4dZkce2bI9vznmr51mRh2wd72e95T9HdmhzKDEt+paP4MQX9+Rf9lU60wKI//6JgUbAoWJczWMRF
+QehQSMEyJWJNM7B3eYwj1re8KEhnjMGaZNSsaUVB0tcrZaPqbVaMyxiVVlcqRP22KLZljEqLlApV
+q97uiG0ZowOOVzboyitmlTECK6fly2V6fr7qfXtMwTpaVKyUUtX74uYYggVAs1o9lX5F1SCGYDWB
+l2bMVMos5dsZL4HwwTIQFwYrmmICWNEUM8CKnpgDFpmEAVZQM263+shsl1ZxWz/6H/oD/ukPC6x5
+s42L6s4mrEFqClgkRX8hWPeONRYzwBpN0i4Ci8iOkGB5Q7xjbP2CZGDwoX62K29Qy/U33RB8bEDS
+SLUkpfUlYjE3EMmVIewIJTZ7sH4FfQHrqhuuV8tNQUduuJrpTyQ228hg/UoiByuXsN3+A64OtiPE
+kauYEP0bslw4c9MD9xPIA9d/5wc/JJH+uWUlaunL6Di3P1GzPxhMaMfV920N0q8qcvVO27/34/80
+lh9/b8D9D5DIz+3B7ivZFzlYv73+AaKG7x9AaEd8YbB+IUdH5hdkddR/9H2iOuX3XrE1ujnW3O+Q
+tXsdqR3PRnko/GUGQXX5jNsYjki9B5JIWvWSg3UrmVtY5jYSO9J/SV7n/efzOJKsDYI1mkSugOGp
+7ai+HAsLrLEE2afj3uvI7JhzEwTrgJGEA9ZtRPXlbx/wJMlNCA/WfgNpB/4wwCJyy5PM7UQ56u0w
+x2o7YtC/bSaB1eZx6xcqd9XHFKyXpLpnGuLYQBwTog+WF7wmlo3TkIzp7SB2YJ027F63p80csOoX
+dXR3aksHKC2PKVjZG8BpPQEvzYgpWPkrhd1koaWnJqMmhmCdqXd3dOpJd4e73hywjngM7C2viClY
+M7YbtPnKrFiDpSutWY0xBcuoe4HHNLC6KVgmgtUYa7AM8ncfBYuCRcGiYH3rwRJ+UKYLVookVoLl
+0Gw3FFgh7TAZrNRkQVKNwVKXCLIIrNBuUYKlZUZfwOJYd3FpeemqZawOWI4VCwSZ6bAyYk0V2501
+VVnzIBgsDTvMBSt1+vL5WPKVtZNCgMW6iqB6pcs41lKwtNyiACt1gmjGNHWZ/IjBYj17+T0jX+9a
+xGqCNT5wlZbXrQMrJUBEb+0f5D9NDwJLyw5zwUreLx4/80Z6qg5YrGvLMeykznplPXiTwZLc8o3K
+LQqwkqX9cl5VdbGIwWLXXIDGNjc0dwBwSfFLRWXEAoB/NN3xLgBetw6sDeC00C5UT/5LXjVYmnaY
+DNYe0IoeydgIe75GBywO/SC0t62hARXpV5S7NhcsdouWW9RgdfFm+EGXskx+hGCxW/yoTjnHch6o
+wsUXdMDi053Rk94CQFFewmSwtgsp1oIz4M2xmmBp22E6WCtxapK+shv8MVUTLM8p0LurCFVRXauq
+B28qWKj2hcwti3TAqklORRlWvrpMfoRgeU6Cz4VfvqLyDB+x2mA5UCV62OV3v6V8xoHpYOHC+6ic
+9CUZ0CqwtO0wHaz1yWPSUfb7GngjWQss9l0UL4QSVKgevEVgofrtvFtw9Y1drA5YqenIjuT5UqGx
+voCFCnzPFltzvgo+l1XADwZL6Oy/SHUIrAFLfH0azNACS8cOK8DCr1aCPVpgscXdgSjFek71yoqH
+mAkWrt+u4ZbgiKWuYNcXsIrlNe9dntJlLpcxWG8pC+JYBdbDivroSrB07LAALFw9acwH2kMh7ODP
+ZflN6arZ1kQsPbeEAAvbsTIaQyG79pQ8HXEpCnyHzrFSUHGJP8Ugx4Ij8InHNHIsPTtMB2vjmIyM
+jKzpe5QdohwK/6GsB29R8q7rliCwxmRBM6at7z7zm2iABb7RLPCtBmsFlld2A/CplXeFtQtwuxvO
+AHmxFDVY2naYDpbU+2O0wTolPPmBcwbVgzcVLB23aEw3gK7fJfd9uoEtB8f4Osw7ULnc+vpjHlYL
+rIDjP1UW/jUZrIC8PFoTLB07LAML7E/XBMsnlBUv4tU7uoO1BKwK0S2VQrsezhAs0Pi71KiB5XaK
+v6srZnXnsbygd/tMVWFnk8FqOYAnsb58KVt75l3PDvNzrFS0E3nCym7FWKgEqxsUadSrNxUsyS1t
+wW4JcVcIBT2VrysKEWut/yIfossr0SMJOsEqVjfHelo9O2pRjvUW+FJZ9Fc9FGrbYdFdYWry/G4g
+G0XUQyFOojkPUq/iiKxIr7lDodotRazBXWFqctZJ8NfkKCTvwnQsXw65Qw8sNI/FFwxPsRYs9BzH
+46D3MZ2IpWOHVdMNY1JrwHwNsHTq1ZsJFgfd8oLCLYZgwZfrFfNxkc5jfSKfS2QNwBIKhv/J4oiF
+XkxCFTS1F6F17LAMrGRtsFhUDz6g7A6LwFK5hbMQLG4NWl/gxJKMHXo5Ft+vdx9XFQy3BCx+ENZe
+hNaxwyqwUtNPakcszyk0A87x6jmrZWXFzQULAh1wC8z0VhmClZr6RjTAQlN34O1l+HET7jUNQIa0
+BlgpDwNFOWmrJkhhqOzVWYTWtsOatcLk5DGvgTOy/Q2qJZ21F8AXq92ouoq7aK8ffMxatFao7ZZg
+sPj9WMv9QHFbGCFYnAs23ftxZcWOgx3oOezGM+9BT8+waOYdDoafai9Ca9thOlh7lq+Esr4GKJJe
+1SI03nzxBVSvGpW9/uwFa5Z0VG659LbOPFYjNmPlHgD+nhyV/VicVEi996NlrM5+LLG3YQ9flG+6
+Mxms3YFnGsufIBm0H0vLDqv2YwGwUXc/VvFe8XNflLo4y/ZjabpFcx5rf3qUdpCyruLqg0cOVpe7
+We2Nfo7aA9Ja4YLa2plWgbXi+EvSIvT22t1jdXaQathhMljra/BPlfe8sVK5jSloBynLeir2HqlH
+5eBZ6/ZjSW6pVLtFCVa+YMaejdNTo73nnTXY8x76tfl73h2ybfcke97Z2Ox5Tybd887FZs87S7bn
+PWjTu9m/0nE4ZC8dlu15d2i1e9n8SkeonfFt/5VOuoYd9Odf9OdffQKL/q6QgkXBomBRsPoKlo+C
+9e8MllGzZoFV7+4EPm3pBqWxBSt7A/DqyWVQxqirpwv+H/6BRfybF9AY4zJGHt3u9YFOs8BqVlfi
+KlIXXjsYU7BWOCYpC61NUr6f5NhArJ4ZYK1Pn6astKZ6mzWtNYZgnf7aYyjNPFizSeowQ7DGkgiq
+QdpWf0QhR5Vv64+CcMAiaheBRWZHqFKRu1UCog7WbQOcROWucanID5RSs3+PUlrDKhVJ5BYnQ2iH
+vQl8repetdTj/ZXMreyTBML+6EbHHSRyRYmv6fQZlYDI5ZnvELU7+joyO5w3PXO+6YJKuiNXr8l+
++5hfGkv67cyjThI3329vamrqVYu61TCK2/6IzC2PwohFYAeMWB8Gd29IdZgBVwbJVVcFHxtgG0wk
+tiH2IBnZB7BKCNu9NpQdwYeuZOKD1IvP7QNYSf0GBsmg4EP9mBC6XB3iWLB69viIn3ngA8+GajeU
+MKR2BOtnD13nPbNuH4HUjcwl+ty+pMLgz9X1BayRZPpl9sGOPujXNKSs7kNjqSuzV5HoV1eYFOJo
+U+Rg5RK6pcreBztCTgIwhF/XtKVkn0siqfPeRe6bQsLHWuROJrRjIYimNNnJqKyznyf63NakaGrn
+Azk5ZJ/sIraDpM67VwCrcf1GXVnfDjtkLgANldX6gsAqA2C//vXWv0acJPBgvW/QbmUbADkQrI0b
+CewoAJ1GZlSHAxYcB+r1L1gJ7773oWfQbDd4HNsBASwD7SobwgLLf3yDgWzn7TDqXtGOAxsM7fBi
+sHqmTcifriP5WfkYrA6P+nlsKnFVYLBqMqZN15X0jWFFrGqXfrPFniIfAmtlhq4Zgh3PglJ3qbEd
+YYBVb6Sfqx53yAbHzBm64qiFYPlAhUtfvyJPWzhgeWdkz9JtdqZjA7TjX4bdy9txoXb8jBmGdmCw
+2rMMJtzemIDBanYbGFRZjMHak2VgbziP7oVgVRg98PSYuwOBZTRjzdvxLPAYPmG1OCywqosMPlRU
+jcFascDgc9m7MVhGj+7tcDeEA9bp8bUGH1uwAoH1tbuDxI4LB7KBsR08WBP2AP6Rb/5QAjtkGg+W
+0SNj0bOUk/hnQoe8EN9GTwRg6Q/sxzydAlh+YzuejfIzoavA+0ZgreLBemmBfgrgJQQrnGdCQ7DO
+Zx8wSIh4sNoMA+EqASyyZ0IjsPYbfNPDBSuKT7EnilgSWAR2ULAoWBQsChYFi4JFwaJgfVvBajcA
+azkFi4JFIxYFi4JFwaJgUbAoWBQsChZN3ilYNGJRsChYFCwKVphg5RCCVUAG1pCS6A6Fc0eSgNUB
+cu4jBKsgumDFE4IVTwZWmT3KYGWC00RgxROCFU8MViEZWLklZGDlVEU3YpXlkkWswgIysKAdUQUr
+s44IrLpMMrCqMkFUwVo4lzBiZf7raxKw6jK7ScECRGChaxGBBSWqYEEhAgsKCVjQDm80wUJCAhYS
+ErCQRA8sLERg4e5tI7Jjd1TBQjuiiMDygegOhfCCZGD1kEUsnzeqEQvZSwSWjxAsX5TB8hGC5SME
+yxdtsC77iOUnAwsKjViXU8QKC6xoRiwKFgWLRiwKFgWLgkXBomBRsChYFCwKFgWL3hVSsChYNGJR
+sChYFCwKFgWLgkXBomBhsGoIwTIoR1IpgmWg4PIwk/dygw80IMUgWOsJwTKsNhNlsIolsPRlkgBW
+pf7HOsIFy6jazIoVRN0r2LHbsNrMJBGsjNcaa3SkcT1fl6jBVd/coCNtFXz5nz0ZNcrrqa7emB8m
+WMVtDbrtHnR1oC9e/nxdMyQ7PJUG16soDku/ao+uWxqaPTwpK2Ycr9WV8TxYxeW6+jUfcTWEo97p
+8dv12z0+cwXfvUeI7Ng9vraWwA4IFliZkaWQaRMUbydk8KHAV+7WL+8t1G9vn66+nvJtFnEBfGGk
+W2RQVhwXSvNDoLN0RbSj0uUhsoNUDOvaCxGmdrxKpirfOma04M/VG+jnLveFpd8Kh7Kd7Gy1IgeI
+ulewo2WG6nrjs0PZwaCa4Y2tja2tjY3wL/g3fo3+j9/gF9LorpQO1Xt+jPaDdnxuo3AJ8bKyNkjr
+lIuxv81AhM81tirsaNWyo43wepHqp37fKeQyLcdb9OT4eSEHazO4XpjqAf1moVZ8uz4jt3TyZpw3
+uh62gyFSzQ8uf/H/m9jxbyIMdQEVChYVChYVChYVKhQsKhQsKhQsKlQoWFQoWFQoWCD0g0V8fvUL
+2SdDbKDwmqqu1xtQQd1SqCNBp/WYrKDkpR5/kEt9BKf5zFscUDTfE/zSq+llXwTdq4hYWwvmIlla
+8o786M6SwmeXbj6ruOjhrYVzl5YdEo41FSycK5z5odnfhJadJagZ6XG7hULLSBm0ZFNXoDgiSi86
+benmdtO/qYGGsGuqeJfOXbi0rJVfUtonOHnp5h6VlxeqvRxlrsokXTa3KjpzHWq6Sd408vKzopel
+M5eWHIpsKExjBIkfd1LEc93wBHxo2JRuiebD9wyxoWOJ4w7hz9QxktinmNp3LYtvxi3HD1si+EFs
+2JY4hf9yyHXhu9ILehcPF0/zmxey4IV7BXfFDVvSjRvKlanzIj5SKB0Y9g7Q8rIpYKUxoZremZYg
+eOuk6JqAlzerzhTACBesTGZkDpTJsKlbeANbHoH43Dc5J3M4w9wiXvP5BCYOHUuzMQO3ocel1jHx
+OVgyYf89ZOJ37vBQ6JP7YDsJjO0hXsN4JlNs+SkcIhi7qAsDj3Sh074cJZ3G3GVaz6H9C7Ahu+Cu
+u86iYwVMkqBOAtNvG3JNCX9kMjww6JD8tEzey2dN857QvTk5sH8HviN00xM2Ji5tMvZfIj7mlbyc
+hrzs589ME8Eg9Z8KrBL+xbqh/V7EcX0iE7fkEPrWt6yTrvmcjZnyYQ+Ol6OYQTU4YsVLEaV/3Aem
+9dyXI5jEzdjxdYttzO9xOLIzTfwQ9Hx/rEsVIz7bvOURW2KNeNqSJv60BHiaecPgKNjQId4PCcyD
+PFjis5EPj7Jh1kqYTOHACPgRv+RlIHj5Qb95YJWIugy1Pci/eo6JEzpz3XAhdEB3DdvcJHr5KcWZ
+6wbzYIQNlvjA9CeY3yKbN6GQJMh7gxkcGE8k2J4Sj50bxUzhwRLzu97/Mq/n4LWlqAnxjsOv7cw+
+4V+HM1sxWElisnnpZ7YXkRXPMbdIucEm4bToC24oUcpON/W3HVKABb66FkeoEiYNKojo2cQMA7yX
+A6dBL79jHliFUjPMNThlec8WJ4FyYiizJNjLiScVSCIwIgfLDxZiPHqHMjI+JyIdusAjzDi/6Enw
+pu2hs3Kw/HJPRrvjDg+OCzj93IjEDySwvH6o2HDbZgVYXYI9h69gtgXgHJG4zaxtWS3XMbJrTxy4
+TeUO/jvAg4XzdeYWrNJ1zIuB9GFi3IsWgNXL2M6jUW8UzFykf3+OeQpqj73slcYI6OUAWH6QRxo3
+QkescyP6ob54b8A1rYF/PXcIJiz+iz/jUwW+oUsfnvdaFrH+zPwk0AG9hw95gWwohN91/EWTR6x7
+bEuE0wLSiawwR95kftKtcpccrBPX9jskHwrBRD4rhNHjrPo08yPWoPNYpUEBiADsTOyuu4CkQm8n
+9LIsYn05ot8HEYGVu68KyrpRcLSDt3+/tj0l3HBCEW6Ot18x7JR0DM+6+GU5Vi/MsfaY1XG/Zv4X
+KNThc6wSrPPihMRtQJFj9T5iG/gBPu2/g08zI3VXNCT0TagcKw0rvPUe20N4UgJ62a/2sgU5Fmpz
+Q/9xFwKdqeVldGYeD8bQuCWE92YqsEQZ181rsjTohHXMfUGXhmAVYMk19a5wKBNiGiU+XtSZn26o
+YobMRark5eK7Qi867R1ggfjguLI56GgBM5JXJwfeSgt3haLGiYcwj6G8bBJYabwu8IZ0IJ4oeoZ5
+POj7EexlnwqMSMCyJyEZYkP5G5q+4BH3jeQlTYjkXlxCPAkfS6rDYEkTHQ+1muYaO1OHo03VEL7l
+PB6sJEFnfGsMwZJ0ieMjwRCmisdfUDgH+MzpuJH47gGAJqGhTHisIDB3lMinEBAsXuEEG8zZ/TIv
+JwW8bBJY4pyfeLeQJ8bTNEFlH/DFq7xcgM+UgxF5jtWybiiDponE75JPpAb75T4erCTh4D4MVi6U
+oTbhZtskuVn4LolzoJkibcJEBx6Jqhg7VCUnIT5xyVnxtHeEWMZLmllgpQkRS5wvTsJgjczLzc0c
+HD9MnPUXcyzv4XuYRMXXV+ZlsyIWdE1mf2bcZiHuSBFLRA7mqwlBXg4Moi3rbmYe8kcOFkzuEq75
+AA23S7AGvq1QqgqZIfD17sHj+ByrCh3cahfAQkc+HYxaNW+x60Zh9G/C6uSIYO0Tb/ZtP+mRcqxN
+trglqtSMPy3XLLACORbvLogUBiuXny5KFO9MA3eFJ0b0ezHotELIo6k5Vu9E5hYxY39ezLFwZ5bF
+I7DUXs5RpP0QjJo+gNUDhjNl6DZHfldYh8E68bNB2xTD077AXeGmBHT3ahpZf2YelC2eFirAQvdT
+TL/zgbvC51CGhXVR3hVuNQss3JDM9io5WLA3+21TgyVMCilvJveZCVYh8si5UWjePYBJICXGEes5
+6OUumZdz5Gm/X5gtjBSsLhif4fmXZPNYfvAhAgvNYz0kW65UgAWet9leNAsrPzjcP64m8G6pGqxe
+xtYjm26YaBO6Ep4mZu9ePygzD6xzV0jzWLChrXKwwLkRaNxTgCVOcp2TzbMhL5sLFpozGMrccoEP
+sqOYpwJN92CwdvZPVHhZAVaXlEhGOBQKcUk+lQzvB6/FlPfv91RgkeVaOVj+XvhlOGkWWTiIB67+
+hHoofNP23XYZWOew+/zq0xabB5Ziih+6Sw4WzhOE5F1U4NJE2+9BsJeHmD6Ptckm8iRfWgGH8awp
+dNddSi+LYHl5MPZHApYAZssjDB58YSOJwlrhzidsaAUCyNYKfXWLb7bZ6gJgecGJwba7zNs+EFjF
+atl5j42ZrJggPTwUeyswQWjMGXEAAAJqSURBVPpef366gV8rPCuddp9pYPGLkry7HrEx4+RgoTUo
+YbohU5zzs/FBLMjLfpPBgtFcXISeKK4Vnt+3OMEWh159qvTy44oJ0ntIJxxUYKUVoNu7nOEMjs5e
+YXcDOmJjmHHC1/F5G9rdkItWv6EKfsXM+3MMs80srsR1d3hnAxWMm9LKg5WTh3TOTGDUM++PMGgM
+9PrA4VH8adiKKa3m6Ye2UUjuwhMvAbBganNLK45YSdjJuXg/hh97+Z4QXjYTLDgY3iXQ/QQT6Mxx
+7wS8PFn08ln+fhI7OedmJo5wUjD0fixmmLRss244nhey2ccJW3jwfqwEfr/OyCU9wv21CNalEcwg
+8wZDaacQY59yiE/NmcBWITznt5Wxi2DBACJsQhH3Y/GnmSi968SGkLu8aD9WjrSM0h9veAjsx7Lz
+82z8Nq74wGlmgZXGzBW/AZsYKbkS92PF4xiBs4qWxUPkXvaFBCMcsEpy87Aod1ruLJlbwG/HlO0w
+hMcKln4oZDdNuXmBT+dONm8XKcqYdhbCljdLq2sFvMoFS/mOBHU5c6UAsi53ssiR+jTzBDaUt7Ss
+SbwJzSmTdH8+93GYX1TlCE4uUygDvZyn9nKUwSrJqZLePZO7tNsrtHUYdTDuTG9IL/tkYBAvORnu
+eff6Zb0qSo/OcADM3Pfu1VHWq3fAr2djlNlXudQXdCTYjV4L6uCodfEG97RwSL7nXa2zPwKwqFCJ
+mlCwqFCwqFCwqFCwqFChYFGhYFGhYFGhQsGiQsGiQsGiQoWCRYWCRYWCRYUKBYsKBYsKBYsKFQoW
+FQoWFQoWFSoULCqXq/w/gbudjI6bMwYAAAAASUVORK5CYII=
diff --git a/Documentation/DocBook/media/constraints.png.b64 b/Documentation/DocBook/media/constraints.png.b64
new file mode 100644
index 00000000..125b4a94
--- /dev/null
+++ b/Documentation/DocBook/media/constraints.png.b64
@@ -0,0 +1,59 @@
+iVBORw0KGgoAAAANSUhEUgAAAlQAAAFYCAYAAACVsmLPAAAAAXNSR0IArs4c6QAAAAZiS0dEAP8A
+/wD/oL2nkwAAAAlwSFlzAAAOxAAADsQBlSsOGwAAAAd0SU1FB9sLCBIAKVtZsMAAAAxxSURBVHja
+7d3ZbqvIAkDRLsv//8v0QytXvpYZap7Wko56OAnE2AXbBSbhOI7jHwAAkr1sAgAAQQUAIKgAAAQV
+AICgAgBAUAEACCoAAEEFACCoAAAQVAAAzb2jvyMEWw0AmFvh37xnhgoAQFABAPT1zvruwtNlAADV
+VLxsyQwVAICgAgAQVAAAggoAQFABACCoYEohuFkugKACsmLq178DIKiAyJgSVQCCCigQU6IKQFAB
+BWJKVAEIKqBgKIkqAEEFFAgkUQUgqIACYSSqAAQViKkwxjIAEFSwbUyJKgBBBWJq8GUCIKhgm5gS
+VQCCCsSUqAIQVMBYoSOqAAQVLOk41lwXAIIKhoqqJyFUYhkACCpYMqpiQqjEMgAQVLBUVKWEUIll
+ACCoYImoygmhEssAQFDBElHVexkACCoAAEEFACCoAAAQVAAAggoAQFABAAgqAAAEFQCAoAIAEFQA
+AIIKAABBBQAgqAAABBUAgKACAOA/b5sAGjsO2wBgMWaoAAAEFQCAoAIAEFQAADtzUXohIQQbAYDi
+Dh9kmYIZKgAAQQUAIKgAAAQVAICgAgAgmU/5VeSTGQDE8InxeZmhAgAQVAAAggoAQFABAAgqAAAE
+FQCAoAIAEFQAAHtyY0/o4O7efe4JCzAXM1QAAIIKAEBQAQAIKgAAQQUAgKACABBUAACCCgBAUAEA
+IKgAAAQVAICgAgAQVAAACCoAAEEFACCoAAAEFVBICGMsAwBBBVPHVE4QlVgGAIIKpo6ps/9utQwA
+BBUsEVMpQVRiGQAIKlgqpmKCqMQyABBUsGRMzbouAAQVNHMca64LAEEFy0WVmAIQVCCqxBSAoAL6
+hI+YAhBUIKrEFICgAvqEkJgCEFQgqo4+3wuAoILto0pMAQgqICOQxBSAoAIyQklMAQgqICOYxBSA
+oAIyokpMAQgqICOqxBTAvN42AYwTVQDMyQwVAICgAgAQVAAAggoAQFABAJDMp/y4FIJtwJx8ehJo
+yQwVAICgAgDoyyk/HnMKhdE5RQ30YoYKAEBQAQAIKgAAQQUAIKgAABBUAACCCgBAUAEACCoAAAQV
+AICgAgAQVAAAggoAAEEFACCoAAAEFQCAoAIAQFABAAgqAABBBQAgqAAAEFQAAIIKAEBQAQAIKgAA
+BBUAgKACABBUAACCCgAAQQUAIKgAAAQVAICgAgBAUAEACCoAAEEFACCoAAAQVAAAggoAQFABAAgq
+AACGCKoQPAs2JQAIquwCUAI2JQAIqowCOPtvbEoAEFQRBaAEbEoAEFQFCkAJ2JQAIKgKFIASsClh
+szEKrDGoXkNuiOPwwim4iezYoc9+39iDfQbVq+mGEFOiCjZ7E23swR6D6tV8Q4gpUQWb7PeNPdhn
+UL26bAgxJapgk/2+sQd7DKr3EDE1y96mUPT1fqgh6Ffosbsz9mDdQfXquiEY/rUKlBtLYgoqDJZB
+Dmjlg8qRWlSBMSSmYLOoKhtUjtCiCowdMQUbRtXLswUgpkBU5XkXf9CmPJZ9nQJrft6Gife9XmC/
+t0mHg9tr3FcJYgrmjilgn8Fa55SfI7WYAvtnYKNBW+8+VLGn/zY6wtd4qDY1iCngx+BtdNCre1G6
+W3gPt7MXUwAwW1CJKjEFCzB2wODtH1SiSkyB/TKw+KB9DfnARJWYAvtnYKLB+m7+AJ+UgL2WTQmT
+jz1jEJVf0ASD7jXck2/vY1PCQscwE+6wfkz1CaqrB6wAbEoQVcBkMdUvqH49cAVgU4KoAiaMqb5B
+9bkBFIBNCaIKmDSm+geVArApYaOxZ4zCuoPq5VkDqL//F1Ow9qASVACV9/9iCtYfVIIKoOL+X0zB
+HoNKUAFU2v+LKdhnUAkqgAZvqoG1B5WgAgAQVAAAggoAQFABAAgqAAAEFQCAoAIAEFQAAIIKAABB
+BQAgqAAABBUAgKACAEBQAQAIKgAAQQUAIKgAABBUAACCCgBAUAEACCoAAAQVAICgAgAY3NsmIEYI
+//3zONK/7u/v/nx+zdPl/1rO0++LWd6vZZ59Xe7jSfnZSq3z6jnJ2ValX09PHj9AD2aoiPJ34Lo6
+wJWKiJQD7N2BN/WAzbNtZTsCuzJDRZeD8XHkH3zPZo5CSJudeTKbdrX+lkE7QkzFbq8VHj/AGTNU
+dDkY1ziw1jjY7nAA/wzKqxnIu5gSPICggoTIuDroXh1YRz3ohuCUlcgESOOUH81iZdR1fJ9+zL1Q
+use1Y6nrvLsearR46rHNAQQVw6l14HtyOurJz5USVqs9LynXt8V+ShBAUMHHQfdzFuMsQGqHSW5M
+PQmrVtdsjRCkOwY5gKBiGne3Okg5WJaMqbuw2uX5+P6aX4H8/f922F4AgorlgyD3hp47z3ycPfZf
+p/FSb00BIKjg4kD8/cm4mFNjKfd/OpsJyb2GJ+V+UzEXSK9wAfuvqGr9s7ooHRiV2yYgDCe8xUOp
+gHny2GNjVdwAOzJDRbUYSfnep8srfdCOWV6tr225ztzt3PpxiTRgdGaoAAAEFQBAX075sbS7C6dH
+OJU0w8/ocQEIKjY2w0F71bAQTMBOnPIDABBUAAB9OeXHY36tCAD8ZoYKAEBQAQD05ZQfl3xSCwDu
+maECABBUAACCCgBAUAEACCqgiRDczwtAUAFZMfXr3wEQVEBkTIkqAEEFFIgpUQUgqIACMSWqAAQV
+UDCURBWAoAIKBJKoAhBUQIEwElUAggrEVBhjGQAIKtg2pkQVgKACMTX4MgEQVLBNTIkqAEEFYkpU
+AQgqYKzQEVUAggqWdBxrrgsAQQVDRdWTECqxDAAEFSwZVTEhVGIZAAgqWCqqUkKoxDIAEFSwRFTl
+hFCJZQAgqGCJqOq9DAAEFQCAoAIAEFQAAAgqAABBBQAwibdNAECqcPKLJo8fH1cNN7+U8up7jpOP
+v6as//PvPr+/xPpTlsEazFABUDSmnsRTie/pvX74ZIYKgKz4+J55+fu7EMLPWZmU2auY9YsjejBD
+BUDRmDk7pdZq/Vf/P2bZT7/2OI7/rU/ICSoAiHIVLS2uFyq5Dtc3kcspPwCairmQvHUghhBOT1U+
+eQx/fyfQBBUALBNrtcPmc/l/QYagAoDqYi9ib/2zPZ2l+hVw7Ms1VAAkKXXbgpIXkH9eIF7r8T15
+bEJLUAHA4wD6FQ5PPoVXc/0ll3/3db/+sCen/ABIio7PU3U5YfIdY0++78n6RzPqxfiUYYYKqh94
+rv/AzFGV8nelouLue3JC5e5XzTx57E777SUcsa+4zxeIo8HlOw/vOgBwLBlqA1drGDNUAACCCgBA
+UAEATM2n/CpyQSIA7MEMFQCAoAIAEFQAAIIKAGBnLkovxI3XAGBfZqgAAAQVAEBfTvlBbXf3I3O6
+GGB6ZqgAAAQVAICgAgAQVAAAggoAAEEFACCoAAAEFQCAoAIAQFABAAgqAABBBQAgqAAAEFQAAIIK
+AEBQAQAIKiBFCGMsAwBBBVPHVE4QlVgGAM29bQIoGFOf/30c7ZcBrV/zd6/Rq6/7fs1/fs3T5Z+9
+AckZO2dvaL6XeffGJ/XxpPxspdZ59ZzkbKve278BM1RQOqaeDvbSy4CW/g5WV6/RUhHRcuwYc2W2
+VY3tP/hzY4YKar5bfLIDeLIMM1WsOnaOI/9AeTZzETt2YmbTrtbfMmhH2PfFbq/Syxxk/2iGCmrF
+1Kzrgplez78OpjUOsDu8qfkMyqsZyLvwSdleNZYpqGASLQe3GSpGHgNXB92r1+6or+sQvInptV+a
+eF/nlB/kDv7aO14xxUpahErqOr7Hc+yF9y3Hbul13l27NPJ+aJBTgYIKRo4qMcXK46b2wTVlHb9m
+3VpcXD/i85Kyb4v9lGCvZQoq2CiqxBQzvfY/ZzHOAqR2mOTG1JOwanXN1ghBunucR3INFYw4qMUU
+K/sLsO9rlXKuXSoZU99jcfXxmPpp5LP7f5W+B9Ukz4GggtGiSkxBn5ja/UL0v3D5/nO1jyq1zWos
+szGn/KDGTinnoliY9TV/FzZnr++U+z+dfcIw93qblPtNxVwUvcIF7N/7uZJRlbLMQS5KN0MFtQ4w
+YgrWGberjs+Y21vExmqN/eDAz0M4jsifrtZ5alh5ZyWmAMbaJxfe75qhgl7veMUUwDIEFfSMKjEF
+sAQXpUOrqJrk5nSwpLvT7yOMxxl+Ro9LUMFQUSWmoP348zN6XIIK7FgAWDWo/DZuAAAXpQMACCoA
+gM7iT/m5BgQA4P+YoQIAEFQAAIIKAEBQAQAIKgAABBUAgKACABBUAAB7+hfHbDX87cMFJQAAAABJ
+RU5ErkJggg==
diff --git a/Documentation/DocBook/media/crop.gif.b64 b/Documentation/DocBook/media/crop.gif.b64
new file mode 100644
index 00000000..11d936ae
--- /dev/null
+++ b/Documentation/DocBook/media/crop.gif.b64
@@ -0,0 +1,105 @@
+R0lGODlhuQJGAeMAAAAAAH9/fwCvAP8AANEA0dEAAK8Ar////wCOAAAA0QAA////////////////
+/////ywAAAAAuQJGAQAE/vDISau9OOvNu/9gKI5kaZ5oqq5s675wLM90bd94ru987//AoHBILBqP
+yKRyyWw6n9CodEqtWq/YrHbL7Xq/4LB4TC6bz+i0es1uu9/wuHxOr9vv+Lx+z+/7/4CBgoOEhYaH
+iImKi4yNjo+QkZKTlJWWl5iZmpucnZ6foKGio6SlpqeoqaqrrK2ur7CxsrO0tba3uLm6gQC9vr/A
+wcLDxMXGx8jJysvMzc7P0NHS09TV1tfYxbth2d3e3+DRAePk5ebn6Onl4ezt7u3q8fLqANtg7/j5
++s/z/f4B+wIKHAjsn8F09ex5IciwobuDEM1Bi0ixosWLGDNqrJhQIZdk/htDihxJsiTJiSZTqlzJ
+MmNHj1q+tRznsKbNmzhzDoz3EiYWmTN7+vQJgOfQmN5mAjzKtCg9pj+TBoU61ClCqlaAthSKVZdV
+dFy7NtHKMqxYW1/PmT2bhOzKtWxlpZUYF4pblXDrvpq7Tq+Tu+UGCB5MuLDhw4gTK17MuLHjx5Aj
+S55MubLly5gza95MmVxev0EAkxsg8jNoVXNJ0zy9RPQ41RtNsz6V2vPstlLTwdYo+zap2qt9G3Ed
+YLdL4bGAL0VOhLhxjL2Zf1IeXboM56Wtt6KuPXRudM8vVu+eiTt5H9hDjj9vyfyIXrTW80gfO4OC
++/jz69/Pv7///wAG/ijggAQWaOCBCCao4IIMNujggRe4J4IwBxBg4YUYZqjhhhx26OGHIIYo4ogk
+loihMBbi1k084VlklgLsWQKjBRJqgIwEBJRyY4UqZsNidhjMGOMkQlLgnjERwkdBjuVpk2QFTB5B
+H2/2DUlJkRNYhWQKUTKyJQpdFjHlcUFaSaQxo9nGQph/fCkDm0OMCV2VZh7iZpbnwCYfBnDKcecO
+fXq3ojotckRnnXr8SQGWEtQIphuKEhEoEHKKdygHCUiQ6QEJdDrEphWA2oGo3UXaAaMHOHrCpFmY
+2gSr6H2XJ5AXoHqBp5xyuimpPfCa6we+6uWqCaiqagKsTAxrBbLz/slqTqEUvWgBqLviSqqvnXpq
+rbbZTpDtt9ziSsG3unKraabkltutWMq+UOyswa3A7A/tfjGvDpW6eKm3v+a667i38vvvuQLzW7Cm
+AJ878L/W9ouuR/Xi8O6zasorRMRo3JtDvoaWOe2v4IIc7LUIE4zwtd1Sm7C6KZ8MLsmzYBzExIFV
+rILGJsgcB843cBztvgqHWnKwup5s8rroVivwwEc3DHLR/jKcis5K0JxmvDezQLUePNvgc0TSBix0
+1OuG6nS56nob7ssqp132wuIi7cnWU1j9ms1chkD3IF3X8DVEYe9AtNi37M2F3cXh/WgFhjPSNw1/
+HxS4CS97MPjH/ts5uQfieqbQuCWPzxC5QZPncPnYoXz+BueKY+Bm6J3AHsPo/5TOmup5sB5vxLJv
+0vsLtPtjO1W4D0Kz6r9nknwLwfczvFeam6IAmndjnfcsy2vtbM3qAT2KkhkULwj4SRITIbzLWYx9
+j9j82L3HvyljivzeG1tC9qCzf4379cEPigACCAYAB0jAAhrwgAhMoAIXyMAGOvCBEIygAVMVDBLo
+Ln1ZWx8SmjeP521CAEYiXypAGML1XHBPF8BfJVToue1drX+1GgUJZTHDFJywBSycRA5PwEF5eFAT
+NYRFEE9wwzXRYoc5c2H1YGgBW32QFkMk1vkoZr3FyQKJJeih/lH894kotsKLFpwi9zB4vSvqzxr8
+oxIXPQHGVbRRBEVUnxk3qMTEvS+GonjjBBCwxwMg4I+d0CMI4pjBOUqpjtACm/c4IUhASuCPfPQj
+I1lAyDLGAosk0OJT1hhIC0RSkpDsoyg9GUpAhtKPp6QAJD9pB0F+oJJWvOQZq5FGMuExFFHkYyR1
+OUpWqrKPvHykJIXZyzy40gOwXNURZ0mNWs6Jk5P0JChXKUxHXsCXwQTlKIe5h2OeSowvRKEFMOkI
+ck4IkbRqogyvaU1uZpOd1URlNXepSnriwZscSOaxlknHQekmnRVwIhAxgM09rtKXBrXnKalJzFTe
+AZ8b0Of9/vh5SH+CB6CLWicPEAoIiGpAoiQwp+OYOQ1nWgqaT0TBQTl6TUN4tH7oEyeUKDocdN5R
+nXnsAUv98FJO2i+kNBWTTZkYUI3SkJLgXKJMlxTU5gxVjbf8HxSRSqOY4rCpcXqqLXGKy6muAKQj
+EOkixPoBTV4FpQOdRU+jiicqkjGWsCCrB8wKlkWm9KhfTaod36pMDVbUR4TC6AQEmom1spGqjLOq
+Ef1aU4uiD6pclapaEWskxcpRlv0E7D9vWtScTjavVXXrUicgV0SUlgN0VYtd04pXFYBVBKc1RGxt
+pNVnsvWwn3WtXju3WEM2VrMX5WxGPdtaG+62dftkrFAd/utWyHa2q7k1bmjHOFocYfVitT3pbTsZ
+XRS8NgSzJUR4XZddfaG1sF7V7XTDeVXlOpW5Y3TucKFbXO8et4p99e1ygfvYrT5XsvUl4n35mlz9
+vpe/zfXvfAEcC8P+t63Uba+BswrfF8p3sEZtMGUhzN7eYvav7QuscDFMXA2DNrGilfCHfxvizRJ1
+wV1Mr3RRHGEPx5Wk0jCpebcbzQBLcb1KVfGNM9vi4L6YxPQ1sXpp3OHLDhnE+xPxkSVAWEw4uMcz
+rmyKbfyK8ZYPwfFVMJIZLMQNN8qyhVzxfovcX9tGNsbdFTCQ91pdHrmXwmC2sJipnOEyn1jLNXZy
+l3Es/g4pX5jPJfbzkgHd5DQ/mcVRdvGhD1DlS1z5rlnmcJC57Aov06i8HeMxphWd6TNvWdCdJjRK
+JL1nSvf5FZdGNJM3jepWeJpxoP7Zea0sY/vOmbe1ZsWtS5jnJU660paINXr/rGk6C3nQRI60kY/9
+alco+7sgGLYftN2oXCty15butZxn7WxO21rV/DB0q5FdCWXzmtmmDrSjoQ1lNKrbzQ/GrY9LgO0P
+cJsP/04tXcCdbHH/mNzAnneqo21vVuMbxvpWcqlThWZLPnrN0m6zdt8ccVL7GuHIneidsVthY6+7
+2l80M8VPrfBzM5yW9954vrm77zD+OuRAHbmgir1X/monGtYq/2lYr7tzNif44WOGc81H0G8P/HsP
+Afc24Fa77KXDccB1fjrX0O0MHYea4zSX+McZTeuWC5vrzfC6rkXNWrGPm+zlDvYqol7ynp/859YO
+esXhSm9IN3zad0+yx9/e7IRbvO8Y/7vGdwx2LA/+4HA3PN8XXm+YO1zmEA/74/mN9WdT3u+WBzzS
+ZU1moMN75fI+/OcTH/rFf33mjjf9oguP86HrnFJSlxzV3231EDS9A1rPA915nsipE7zdBuf8zfFb
+YDUf2OhhHr2r8Z7y0wsdtkTHfd2Lr/vjU8Ld4bb+3vPrfDxDX8/SZ/f3k29zkDNf5BMmOfEFS3ql
+/rsd8rR/f87jX/SMHx3zSddxsjdx1wde2UcvuUc6uxd+vTdInWduZ/dyzRRzjAd7ozaAY5d/BAZ/
+5Sd/52dy6YdyrHBtDyh3qjB8H2h3IUh9I6h3LKd6Lld5E3h5FZh5sZd34veCkxeDoDeDogeA9SeA
+ODh78dZoMBiBMlhSFPh6NniBQ0iA49d8F/d8/hd9QDh9goeBhFeEZXeEc4d2zKB238Z2VXd/yud+
+G7h/Hdh/ivd/NRiAmqeF+MeFcWd2XyiBSkiDTAiHN1h9RIh6RriDSNiDefiDbxiEcfiEGUiHkkd+
+U2h+VYh+V6h+kyBBlniJmJiJmriJluiCqSeI/neYhDm2hGvXeJzgCzEjQkxXgnZ4gmC4DGJofGS4
+CcAAC7XYfpFXe9h3ewi4ffSHhfGjiqvwC2eYi/pne/ynffM3YoiYOqhoi894dcuXhsi4hsqYgtyn
+gN5XHt1mi93oe6zoha6Ih6Ooh6VogTpSZ+3RG7/HAcGHBygYiSA4idCYisgUjqA4jqJYaOY4hqY4
+NepYCcI4cwWYbQcYK77IjMBYj3KxFu24Ae94B/HYhlZ4iAuZHAFJCQP5kBoQkXYwka3nhnvYjAyJ
+kWBXkP52kD0gcH2xjQBpj3CIkk6nks2SkFN2kWiRkZOgJByZAR5ZByDpg653jk1YCwM5jADQ/pN8
+QpM7wJIhR4l7oZOSoIoyCXxMiS8JWDsLaJRS+QgwQj5V6Y5XuTFZKTxbmZOvICRKEpYQOZY44JTv
+B5Xx0ZWKgCW+EIUc+IgeKI8qSI9YcZSiUCxp0YhSiHhUSJGSaJFyWTh0WQjv0guB6IiGCYmIOY+K
+GReA2QnHUxSEmZeTuZeV2ZeXWReZqQl2A5nHuIvJ2IvLeJOLuQ2leQmcA5lZ55Y9U5bOc5ZHEZtX
+cl+8mZK8iJCt6XO305h6cEK/KZbBuZK42UG6STzGeQdFlJw+aZte05w+9Jx/GZ10QEjUOU7W6TfY
+uUWzKCzcGQew9J2kFZ6QM56bVJ5+oZ6E/qBP6vmTdBCUhTiU/oiOtyGfgQBS1Gmfc4Cf5WiII4mT
+0uGffvBavymgckCg/GigRMmH1qGgm4OP5GWVy1mTwxl4Q2KheNB0memgO/OKyhCL3QefzAGiddCO
+R0micAChqyah+1mU58GicsCRwgijbyCj6daPsviPMYKjq4OhZdUTPOoGPtp1pFijFGomRMoGSvmN
+draawomNv/iaCXqeh2Ok51Sl1siaWKqQWlqhXJoFU4pr7Ck67nlWKgqlZ2oFaQolUZo/5Bih+hmk
+/IkoIfQHc8pUFKSXbBiSFXmgZcoedQoGf7qeWRKngrCkadekemqjfPokx+mlakilqQCp/mEoqSkq
+pJWaoSGKqdXYp5tqoiDhqdr4pqFqqi1KqqppqabAqbCoqlrpkq3aqK86jbW5AYlqWqiKDCi6qqCa
+q7Q1B4tqXR3wq4VAqydqq2aJq8bqqm6QrGCKWo4KlMF6DMN6q6w6rbIqpbBqgHqTrQ+6rdQDpJ+6
+p+C6rObqA9baNcy6behaDN0ard/arqKaBvGaRJzgrKmqrsTKrvo6V++aA/3KQwebBgArrNCam9Ja
+sPtKBgmLAvMqkfVKDPcKsfkqscdKseNqkCtwsfeZscOwsc4ZsR4bPgsbAxU7si0bBg3LrQ+bsh27
+sr4asy3wstojCTObrjQ6qU+Ks/7q/gU8yzw6uwU/a681m50qS7Q52wVH6wIkuwZLq7FNS57FCrVZ
+lLTFqIG92p4jdaczmqfrSqlcq7BoGrLAeZ2KcLUnm7XvubVpW7RVMLVsCqxk+6NBe7ZDW7cqULUu
+y7Yz2ZRe+wRwKwwo67Q3C7jlWjeEq6HNIl4mq7hy66Z067gWe7gwpYOSiZWPWrnBsLhaS7CaG7ic
+e5J4manMCQiJO7qXW1dPe7pfygR4O7l98LoFEbuqNbu0W7tJcLutC3Wiu7sC662Z+7swG7yRq5w1
+tXV7y6THi6/Jq7zLawTCi3vwWLy/QLpza7rWS7U6m73e8ZHce5e8O3CNG76bOwTk/otdJRu9kTq9
+HFu97Iu0M9O8bfkEgvsq54ua9Guz9nu/+Auv+tuRUtC/SqC73Zu+Lbm+BIy6PfC+h6Sk/8sXiWmo
+EQy/OkDBdMQGDIy+Acy4A7zBwHOeHowbahDCANy3A4u2JnybN5DCSqDAQcDCGGyZGhzDTlUDNNwa
+qQuB18iX2Yi84MvDbisDP5wsQSyOV0rEWYrEFQwDSzwWTVyYlEmoGTyhJCnFCOguB1yd3HDFpXqY
+WqzDXIygXly+nhiZWNwFNmwDOOy9mHvEa0yWOfiJn/sFcTwDc+zAT3nHQAyFnvvG90DGzkuIBWq2
+L/y3gqy9cwiIXZiPfIzIYryP/mUrkml8qI8snl/LiLpIrmrQx0IsplBMpp38F+NRxUhBBX88wqUL
+w6nMxq8Uxkv5BqS8x4MqlJrspF08y897j7zqeWuQy6ybxbxcqJsMzFEQm6xcyU7wyi5sxLLMzFkV
+UbacQnZgzG2ryHjay0L7y9b8wT61umXsJ6krzYxMzY48zlNMkOYcq9t8uOoMzn4rzu6swpnHlgi8
+B9x8y5jMt+tMvXaczz2MiPx8yf4cs/WszL6sxgatyjiZ0ADdB//MqAEtvdNM0NUc0eSMhmHbJu/a
+0Fv80Jzs0T8wPT1B0do8CNxM0mhs0ih9BTMCPiwNnoWQyzAtmjs801RQJPBx/tMzZSdcutNFzNHt
+7NNHwCgtPMm6zAvcadRRrNSQ+2lf0ZnHnNPGKdWoTNVSMDGoidXnTNQ0wNWu6dVfDU69INbyvAgX
+a9bEidZOgDioGdKOwKxw7aFybbvHFY2tmAiJmtcruNdNgJyl7NbVJdh+Sdh8Pcw4yiwGMAGRbQCU
+jQGRnQKXvQWPfcF0LLsQzNg+XIIgiiyVLQGUfdmZTQGpbQKr3cpPbcqhedT1W9CgjbDhqKBsktmT
+XdoHkNqtXdqnLdm7fdqVTdy7PcaGPMSxPdW1zbzD/GnHPNmm3duSXd0XIN3TTd3ajdoVwN1iIJ+K
+PZrNjQQS9Z1wIt3GPd28/m0B2L3d2e3dxJ3dY2DenA3IcTnezg3SUdvNwu3b1d3aqt3b8P3e6m3d
+AH7IIpvR87vRs93R+D3D48qbfbLaup3e7G3avD3g1G3c7W0GEl7fsPy9Dv7gof3c7prIolCa4d3T
+JO6+kQuYssPhgL0WK77MLU4ED7mWQ40KOg6oCt6pIV7HI37jg2vi50Q+SVoGxIjR3pzJDh3OEE3k
+tm3kR94RSa7k0VjjMi3l+Uvl5fqMV04GFaTlUH7SXL6FklyH/hrmZ+ALZH7PUX7mJa7fa2Iidn7n
+eJ7ner7nJgLiDC7AtC3neezG9wuXG2jmgr6KXh7Bhv5DiT4D1qqvja6d/o/+h0K9spPuu5UujXTO
+w5n+2ZsOjotOwJ9ewqGOi2ArxaUe6Keu6J0ew6s+5K3u6sZIjdYb60k962h+6R6L6/is64uY5myt
+vL4e58Ae7LwuscWO6Me+XclesMve7EqczUQb7dJOxdSOs9Z+7S4Q6e267dzexk5N6m3q2aYe7uVc
+yIVe7r0L6ugek/FM7OyuvudurAUgAfd+AAWw7z+Q7yfg79806utuk3F9uvyu7/qe7wCvAwtPAg3/
+UdmO6fP+wPVuJf5+7/uu8BXw8BmP8QrP7x0/AR0/8gl/8CKf8fhu8hpf8h4P8iHfuXpM7gAw8wBQ
+8zZ/8zif8zrf2e1e/vEWj/AIv/L4fgEXD/QXX/RFL/JAv/RLr/JDb/Qpr/QmD/ECz746f/VYj/U8
+T++sjigYz/Jfn/AYsPBC7/Rkj/JJ//Ri//Qr//FKz/JU/+omnPV0X/dbT/FdXyco//ZCbwEHH/Z/
+//drb/Z9H/htz/Ypr/Fp7+zx/rt1//hXf/eB7LhkP/Qk7/eCn/hwr/kjf/lBv/d7v/mKj/ahn+4x
+P/CQn/o5zNM2jtIPnwGvvwPeDq6qX/uSf99I3PkeEPtE7+JVH761r/q3f+g+zft+7/tyv8HBn/rD
+7+jvLurJz+jL//jNT+nPb/qEbvXTb/f2fegP8v3gH/7iP/7kX/7m/n/+6D/707r93K8bnPH+8B//
+8j//9F//9n//+E//oez47J/1SmHJEHDkpNVenPXm3X8wFEeyNM8RCFa2BVA4lme6tm8g13e+9/lW
+UDgkFgOvW1K5ZDadT6hSVURGrVdsdvnjdntGcHhY1ZbNZ3Ra3ZkSyWt4XF7z1rtivNi+5/f9f8BA
+wUHCQsNDxETFHaO3uUfISDa7vErLS8xMzU3OTr1Az1DRUdJS0yBHSdXVyL3TV9hY2dmjRdtb3NxB
+2iNW3985XeFh4mLjY+Rk5WUeYOdn6Gjpaepq62vsbO1t7m7vb/Bw8XHycvNz9HT1dfZ293f4ePl5
++nr7e/x8/X3+G37/f4ABBQ4kWNDgQYQJFS5k2NDhQ4gRJdKLAAA7
diff --git a/Documentation/DocBook/media/dvb/audio.xml b/Documentation/DocBook/media/dvb/audio.xml
new file mode 100644
index 00000000..a7ea56c7
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/audio.xml
@@ -0,0 +1,1314 @@
+<title>DVB Audio Device</title>
+<para>The DVB audio device controls the MPEG2 audio decoder of the DVB hardware. It
+can be accessed through <emphasis role="tt">/dev/dvb/adapter0/audio0</emphasis>. Data types and and
+ioctl definitions can be accessed by including <emphasis role="tt">linux/dvb/audio.h</emphasis> in your
+application.
+</para>
+<para>Please note that some DVB cards don&#8217;t have their own MPEG decoder, which results in
+the omission of the audio and video device.
+</para>
+<para>
+These ioctls were also used by V4L2 to control MPEG decoders implemented in V4L2. The use
+of these ioctls for that purpose has been made obsolete and proper V4L2 ioctls or controls
+have been created to replace that functionality.</para>
+
+<section id="audio_data_types">
+<title>Audio Data Types</title>
+<para>This section describes the structures, data types and defines used when talking to the
+audio device.
+</para>
+
+<section id="audio-stream-source-t">
+<title>audio_stream_source_t</title>
+<para>The audio stream source is set through the AUDIO_SELECT_SOURCE call and can take
+the following values, depending on whether we are replaying from an internal (demux) or
+external (user write) source.
+</para>
+<programlisting>
+typedef enum {
+ AUDIO_SOURCE_DEMUX,
+ AUDIO_SOURCE_MEMORY
+} audio_stream_source_t;
+</programlisting>
+<para>AUDIO_SOURCE_DEMUX selects the demultiplexer (fed either by the frontend or the
+DVR device) as the source of the video stream. If AUDIO_SOURCE_MEMORY
+is selected the stream comes from the application through the <emphasis role="tt">write()</emphasis> system
+call.
+</para>
+
+</section>
+<section id="audio-play-state-t">
+<title>audio_play_state_t</title>
+<para>The following values can be returned by the AUDIO_GET_STATUS call representing the
+state of audio playback.
+</para>
+<programlisting>
+typedef enum {
+ AUDIO_STOPPED,
+ AUDIO_PLAYING,
+ AUDIO_PAUSED
+} audio_play_state_t;
+</programlisting>
+
+</section>
+<section id="audio-channel-select-t">
+<title>audio_channel_select_t</title>
+<para>The audio channel selected via AUDIO_CHANNEL_SELECT is determined by the
+following values.
+</para>
+<programlisting>
+typedef enum {
+ AUDIO_STEREO,
+ AUDIO_MONO_LEFT,
+ AUDIO_MONO_RIGHT,
+ AUDIO_MONO,
+ AUDIO_STEREO_SWAPPED
+} audio_channel_select_t;
+</programlisting>
+
+</section>
+<section id="audio-status">
+<title>struct audio_status</title>
+<para>The AUDIO_GET_STATUS call returns the following structure informing about various
+states of the playback operation.
+</para>
+<programlisting>
+typedef struct audio_status {
+ boolean AV_sync_state;
+ boolean mute_state;
+ audio_play_state_t play_state;
+ audio_stream_source_t stream_source;
+ audio_channel_select_t channel_select;
+ boolean bypass_mode;
+ audio_mixer_t mixer_state;
+} audio_status_t;
+</programlisting>
+
+</section>
+<section id="audio-mixer">
+<title>struct audio_mixer</title>
+<para>The following structure is used by the AUDIO_SET_MIXER call to set the audio
+volume.
+</para>
+<programlisting>
+typedef struct audio_mixer {
+ unsigned int volume_left;
+ unsigned int volume_right;
+} audio_mixer_t;
+</programlisting>
+
+</section>
+<section id="audio_encodings">
+<title>audio encodings</title>
+<para>A call to AUDIO_GET_CAPABILITIES returns an unsigned integer with the following
+bits set according to the hardwares capabilities.
+</para>
+<programlisting>
+ #define AUDIO_CAP_DTS 1
+ #define AUDIO_CAP_LPCM 2
+ #define AUDIO_CAP_MP1 4
+ #define AUDIO_CAP_MP2 8
+ #define AUDIO_CAP_MP3 16
+ #define AUDIO_CAP_AAC 32
+ #define AUDIO_CAP_OGG 64
+ #define AUDIO_CAP_SDDS 128
+ #define AUDIO_CAP_AC3 256
+</programlisting>
+
+</section>
+<section id="audio-karaoke">
+<title>struct audio_karaoke</title>
+<para>The ioctl AUDIO_SET_KARAOKE uses the following format:
+</para>
+<programlisting>
+typedef
+struct audio_karaoke {
+ int vocal1;
+ int vocal2;
+ int melody;
+} audio_karaoke_t;
+</programlisting>
+<para>If Vocal1 or Vocal2 are non-zero, they get mixed into left and right t at 70% each. If both,
+Vocal1 and Vocal2 are non-zero, Vocal1 gets mixed into the left channel and Vocal2 into the
+right channel at 100% each. Ff Melody is non-zero, the melody channel gets mixed into left
+and right.
+</para>
+
+</section>
+<section id="audio-attributes-t">
+<title>audio attributes</title>
+<para>The following attributes can be set by a call to AUDIO_SET_ATTRIBUTES:
+</para>
+<programlisting>
+ typedef uint16_t audio_attributes_t;
+ /&#x22C6; bits: descr. &#x22C6;/
+ /&#x22C6; 15-13 audio coding mode (0=ac3, 2=mpeg1, 3=mpeg2ext, 4=LPCM, 6=DTS, &#x22C6;/
+ /&#x22C6; 12 multichannel extension &#x22C6;/
+ /&#x22C6; 11-10 audio type (0=not spec, 1=language included) &#x22C6;/
+ /&#x22C6; 9- 8 audio application mode (0=not spec, 1=karaoke, 2=surround) &#x22C6;/
+ /&#x22C6; 7- 6 Quantization / DRC (mpeg audio: 1=DRC exists)(lpcm: 0=16bit, &#x22C6;/
+ /&#x22C6; 5- 4 Sample frequency fs (0=48kHz, 1=96kHz) &#x22C6;/
+ /&#x22C6; 2- 0 number of audio channels (n+1 channels) &#x22C6;/
+</programlisting>
+ </section></section>
+<section id="audio_function_calls">
+<title>Audio Function Calls</title>
+
+
+<section id="audio_fopen">
+<title>open()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call opens a named audio device (e.g. /dev/dvb/adapter0/audio0)
+ for subsequent use. When an open() call has succeeded, the device will be ready
+ for use. The significance of blocking or non-blocking mode is described in the
+ documentation for functions where there is a difference. It does not affect the
+ semantics of the open() call itself. A device opened in blocking mode can later
+ be put into non-blocking mode (and vice versa) using the F_SETFL command
+ of the fcntl system call. This is a standard system call, documented in the Linux
+ manual page for fcntl. Only one user can open the Audio Device in O_RDWR
+ mode. All other attempts to open the device in this mode will fail, and an error
+ code will be returned. If the Audio Device is opened in O_RDONLY mode, the
+ only ioctl call that can be used is AUDIO_GET_STATUS. All other call will
+ return with an error code.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int open(const char &#x22C6;deviceName, int flags);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>const char
+ *deviceName</para>
+</entry><entry
+ align="char">
+<para>Name of specific audio device.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int flags</para>
+</entry><entry
+ align="char">
+<para>A bit-wise OR of the following flags:</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDONLY read-only access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDWR read/write access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_NONBLOCK open in non-blocking mode</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>(blocking mode is the default)</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>ENODEV</para>
+</entry><entry
+ align="char">
+<para>Device driver not loaded/available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBUSY</para>
+</entry><entry
+ align="char">
+<para>Device or resource busy.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Invalid argument.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
+<section id="audio_fclose">
+<title>close()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call closes a previously opened audio device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int close(int fd);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
+<section id="audio_fwrite">
+<title>write()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call can only be used if AUDIO_SOURCE_MEMORY is selected
+ in the ioctl call AUDIO_SELECT_SOURCE. The data provided shall be in
+ PES format. If O_NONBLOCK is not specified the function will block until
+ buffer space is available. The amount of data to be transferred is implied by
+ count.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>size_t write(int fd, const void &#x22C6;buf, size_t count);</para>
+</entry></