aboutsummaryrefslogtreecommitdiff
path: root/fs/jfs/jfs_logmgr.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/jfs/jfs_logmgr.c
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/jfs/jfs_logmgr.c')
-rw-r--r--fs/jfs/jfs_logmgr.c2524
1 files changed, 2524 insertions, 0 deletions
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
new file mode 100644
index 00000000000..b6a6869ebb4
--- /dev/null
+++ b/fs/jfs/jfs_logmgr.c
@@ -0,0 +1,2524 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2000-2004
+ * Portions Copyright (C) Christoph Hellwig, 2001-2002
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * jfs_logmgr.c: log manager
+ *
+ * for related information, see transaction manager (jfs_txnmgr.c), and
+ * recovery manager (jfs_logredo.c).
+ *
+ * note: for detail, RTFS.
+ *
+ * log buffer manager:
+ * special purpose buffer manager supporting log i/o requirements.
+ * per log serial pageout of logpage
+ * queuing i/o requests and redrive i/o at iodone
+ * maintain current logpage buffer
+ * no caching since append only
+ * appropriate jfs buffer cache buffers as needed
+ *
+ * group commit:
+ * transactions which wrote COMMIT records in the same in-memory
+ * log page during the pageout of previous/current log page(s) are
+ * committed together by the pageout of the page.
+ *
+ * TBD lazy commit:
+ * transactions are committed asynchronously when the log page
+ * containing it COMMIT is paged out when it becomes full;
+ *
+ * serialization:
+ * . a per log lock serialize log write.
+ * . a per log lock serialize group commit.
+ * . a per log lock serialize log open/close;
+ *
+ * TBD log integrity:
+ * careful-write (ping-pong) of last logpage to recover from crash
+ * in overwrite.
+ * detection of split (out-of-order) write of physical sectors
+ * of last logpage via timestamp at end of each sector
+ * with its mirror data array at trailer).
+ *
+ * alternatives:
+ * lsn - 64-bit monotonically increasing integer vs
+ * 32-bit lspn and page eor.
+ */
+
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h> /* for sync_blockdev() */
+#include <linux/bio.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_metapage.h"
+#include "jfs_txnmgr.h"
+#include "jfs_debug.h"
+
+
+/*
+ * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread)
+ */
+static struct lbuf *log_redrive_list;
+static DEFINE_SPINLOCK(log_redrive_lock);
+DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
+
+
+/*
+ * log read/write serialization (per log)
+ */
+#define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock)
+#define LOG_LOCK(log) down(&((log)->loglock))
+#define LOG_UNLOCK(log) up(&((log)->loglock))
+
+
+/*
+ * log group commit serialization (per log)
+ */
+
+#define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock)
+#define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock)
+#define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock)
+#define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait)
+
+/*
+ * log sync serialization (per log)
+ */
+#define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE)
+#define LOGSYNC_BARRIER(logsize) ((logsize)/4)
+/*
+#define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE)
+#define LOGSYNC_BARRIER(logsize) ((logsize)/2)
+*/
+
+
+/*
+ * log buffer cache synchronization
+ */
+static DEFINE_SPINLOCK(jfsLCacheLock);
+
+#define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags)
+#define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags)
+
+/*
+ * See __SLEEP_COND in jfs_locks.h
+ */
+#define LCACHE_SLEEP_COND(wq, cond, flags) \
+do { \
+ if (cond) \
+ break; \
+ __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
+} while (0)
+
+#define LCACHE_WAKEUP(event) wake_up(event)
+
+
+/*
+ * lbuf buffer cache (lCache) control
+ */
+/* log buffer manager pageout control (cumulative, inclusive) */
+#define lbmREAD 0x0001
+#define lbmWRITE 0x0002 /* enqueue at tail of write queue;
+ * init pageout if at head of queue;
+ */
+#define lbmRELEASE 0x0004 /* remove from write queue
+ * at completion of pageout;
+ * do not free/recycle it yet:
+ * caller will free it;
+ */
+#define lbmSYNC 0x0008 /* do not return to freelist
+ * when removed from write queue;
+ */
+#define lbmFREE 0x0010 /* return to freelist
+ * at completion of pageout;
+ * the buffer may be recycled;
+ */
+#define lbmDONE 0x0020
+#define lbmERROR 0x0040
+#define lbmGC 0x0080 /* lbmIODone to perform post-GC processing
+ * of log page
+ */
+#define lbmDIRECT 0x0100
+
+/*
+ * Global list of active external journals
+ */
+static LIST_HEAD(jfs_external_logs);
+static struct jfs_log *dummy_log = NULL;
+static DECLARE_MUTEX(jfs_log_sem);
+
+/*
+ * external references
+ */
+extern void txLazyUnlock(struct tblock * tblk);
+extern int jfs_stop_threads;
+extern struct completion jfsIOwait;
+extern int jfs_tlocks_low;
+
+/*
+ * forward references
+ */
+static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
+ struct lrd * lrd, struct tlock * tlck);
+
+static int lmNextPage(struct jfs_log * log);
+static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
+ int activate);
+
+static int open_inline_log(struct super_block *sb);
+static int open_dummy_log(struct super_block *sb);
+static int lbmLogInit(struct jfs_log * log);
+static void lbmLogShutdown(struct jfs_log * log);
+static struct lbuf *lbmAllocate(struct jfs_log * log, int);
+static void lbmFree(struct lbuf * bp);
+static void lbmfree(struct lbuf * bp);
+static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
+static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
+static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
+static int lbmIOWait(struct lbuf * bp, int flag);
+static bio_end_io_t lbmIODone;
+static void lbmStartIO(struct lbuf * bp);
+static void lmGCwrite(struct jfs_log * log, int cant_block);
+static int lmLogSync(struct jfs_log * log, int nosyncwait);
+
+
+
+/*
+ * statistics
+ */
+#ifdef CONFIG_JFS_STATISTICS
+static struct lmStat {
+ uint commit; /* # of commit */
+ uint pagedone; /* # of page written */
+ uint submitted; /* # of pages submitted */
+ uint full_page; /* # of full pages submitted */
+ uint partial_page; /* # of partial pages submitted */
+} lmStat;
+#endif
+
+
+/*
+ * NAME: lmLog()
+ *
+ * FUNCTION: write a log record;
+ *
+ * PARAMETER:
+ *
+ * RETURN: lsn - offset to the next log record to write (end-of-log);
+ * -1 - error;
+ *
+ * note: todo: log error handler
+ */
+int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
+ struct tlock * tlck)
+{
+ int lsn;
+ int diffp, difft;
+ struct metapage *mp = NULL;
+
+ jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
+ log, tblk, lrd, tlck);
+
+ LOG_LOCK(log);
+
+ /* log by (out-of-transaction) JFS ? */
+ if (tblk == NULL)
+ goto writeRecord;
+
+ /* log from page ? */
+ if (tlck == NULL ||
+ tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
+ goto writeRecord;
+
+ /*
+ * initialize/update page/transaction recovery lsn
+ */
+ lsn = log->lsn;
+
+ LOGSYNC_LOCK(log);
+
+ /*
+ * initialize page lsn if first log write of the page
+ */
+ if (mp->lsn == 0) {
+ mp->log = log;
+ mp->lsn = lsn;
+ log->count++;
+
+ /* insert page at tail of logsynclist */
+ list_add_tail(&mp->synclist, &log->synclist);
+ }
+
+ /*
+ * initialize/update lsn of tblock of the page
+ *
+ * transaction inherits oldest lsn of pages associated
+ * with allocation/deallocation of resources (their
+ * log records are used to reconstruct allocation map
+ * at recovery time: inode for inode allocation map,
+ * B+-tree index of extent descriptors for block
+ * allocation map);
+ * allocation map pages inherit transaction lsn at
+ * commit time to allow forwarding log syncpt past log
+ * records associated with allocation/deallocation of
+ * resources only after persistent map of these map pages
+ * have been updated and propagated to home.
+ */
+ /*
+ * initialize transaction lsn:
+ */
+ if (tblk->lsn == 0) {
+ /* inherit lsn of its first page logged */
+ tblk->lsn = mp->lsn;
+ log->count++;
+
+ /* insert tblock after the page on logsynclist */
+ list_add(&tblk->synclist, &mp->synclist);
+ }
+ /*
+ * update transaction lsn:
+ */
+ else {
+ /* inherit oldest/smallest lsn of page */
+ logdiff(diffp, mp->lsn, log);
+ logdiff(difft, tblk->lsn, log);
+ if (diffp < difft) {
+ /* update tblock lsn with page lsn */
+ tblk->lsn = mp->lsn;
+
+ /* move tblock after page on logsynclist */
+ list_move(&tblk->synclist, &mp->synclist);
+ }
+ }
+
+ LOGSYNC_UNLOCK(log);
+
+ /*
+ * write the log record
+ */
+ writeRecord:
+ lsn = lmWriteRecord(log, tblk, lrd, tlck);
+
+ /*
+ * forward log syncpt if log reached next syncpt trigger
+ */
+ logdiff(diffp, lsn, log);
+ if (diffp >= log->nextsync)
+ lsn = lmLogSync(log, 0);
+
+ /* update end-of-log lsn */
+ log->lsn = lsn;
+
+ LOG_UNLOCK(log);
+
+ /* return end-of-log address */
+ return lsn;
+}
+
+
+/*
+ * NAME: lmWriteRecord()
+ *
+ * FUNCTION: move the log record to current log page
+ *
+ * PARAMETER: cd - commit descriptor
+ *
+ * RETURN: end-of-log address
+ *
+ * serialization: LOG_LOCK() held on entry/exit
+ */
+static int
+lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
+ struct tlock * tlck)
+{
+ int lsn = 0; /* end-of-log address */
+ struct lbuf *bp; /* dst log page buffer */
+ struct logpage *lp; /* dst log page */
+ caddr_t dst; /* destination address in log page */
+ int dstoffset; /* end-of-log offset in log page */
+ int freespace; /* free space in log page */
+ caddr_t p; /* src meta-data page */
+ caddr_t src;
+ int srclen;
+ int nbytes; /* number of bytes to move */
+ int i;
+ int len;
+ struct linelock *linelock;
+ struct lv *lv;
+ struct lvd *lvd;
+ int l2linesize;
+
+ len = 0;
+
+ /* retrieve destination log page to write */
+ bp = (struct lbuf *) log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ dstoffset = log->eor;
+
+ /* any log data to write ? */
+ if (tlck == NULL)
+ goto moveLrd;
+
+ /*
+ * move log record data
+ */
+ /* retrieve source meta-data page to log */
+ if (tlck->flag & tlckPAGELOCK) {
+ p = (caddr_t) (tlck->mp->data);
+ linelock = (struct linelock *) & tlck->lock;
+ }
+ /* retrieve source in-memory inode to log */
+ else if (tlck->flag & tlckINODELOCK) {
+ if (tlck->type & tlckDTREE)
+ p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
+ else
+ p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
+ linelock = (struct linelock *) & tlck->lock;
+ }
+#ifdef _JFS_WIP
+ else if (tlck->flag & tlckINLINELOCK) {
+
+ inlinelock = (struct inlinelock *) & tlck;
+ p = (caddr_t) & inlinelock->pxd;
+ linelock = (struct linelock *) & tlck;
+ }
+#endif /* _JFS_WIP */
+ else {
+ jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
+ return 0; /* Probably should trap */
+ }
+ l2linesize = linelock->l2linesize;
+
+ moveData:
+ ASSERT(linelock->index <= linelock->maxcnt);
+
+ lv = linelock->lv;
+ for (i = 0; i < linelock->index; i++, lv++) {
+ if (lv->length == 0)
+ continue;
+
+ /* is page full ? */
+ if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
+ /* page become full: move on to next page */
+ lmNextPage(log);
+
+ bp = log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ dstoffset = LOGPHDRSIZE;
+ }
+
+ /*
+ * move log vector data
+ */
+ src = (u8 *) p + (lv->offset << l2linesize);
+ srclen = lv->length << l2linesize;
+ len += srclen;
+ while (srclen > 0) {
+ freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
+ nbytes = min(freespace, srclen);
+ dst = (caddr_t) lp + dstoffset;
+ memcpy(dst, src, nbytes);
+ dstoffset += nbytes;
+
+ /* is page not full ? */
+ if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
+ break;
+
+ /* page become full: move on to next page */
+ lmNextPage(log);
+
+ bp = (struct lbuf *) log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ dstoffset = LOGPHDRSIZE;
+
+ srclen -= nbytes;
+ src += nbytes;
+ }
+
+ /*
+ * move log vector descriptor
+ */
+ len += 4;
+ lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
+ lvd->offset = cpu_to_le16(lv->offset);
+ lvd->length = cpu_to_le16(lv->length);
+ dstoffset += 4;
+ jfs_info("lmWriteRecord: lv offset:%d length:%d",
+ lv->offset, lv->length);
+ }
+
+ if ((i = linelock->next)) {
+ linelock = (struct linelock *) lid_to_tlock(i);
+ goto moveData;
+ }
+
+ /*
+ * move log record descriptor
+ */
+ moveLrd:
+ lrd->length = cpu_to_le16(len);
+
+ src = (caddr_t) lrd;
+ srclen = LOGRDSIZE;
+
+ while (srclen > 0) {
+ freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
+ nbytes = min(freespace, srclen);
+ dst = (caddr_t) lp + dstoffset;
+ memcpy(dst, src, nbytes);
+
+ dstoffset += nbytes;
+ srclen -= nbytes;
+
+ /* are there more to move than freespace of page ? */
+ if (srclen)
+ goto pageFull;
+
+ /*
+ * end of log record descriptor
+ */
+
+ /* update last log record eor */
+ log->eor = dstoffset;
+ bp->l_eor = dstoffset;
+ lsn = (log->page << L2LOGPSIZE) + dstoffset;
+
+ if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
+ tblk->clsn = lsn;
+ jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
+ bp->l_eor);
+
+ INCREMENT(lmStat.commit); /* # of commit */
+
+ /*
+ * enqueue tblock for group commit:
+ *
+ * enqueue tblock of non-trivial/synchronous COMMIT
+ * at tail of group commit queue
+ * (trivial/asynchronous COMMITs are ignored by
+ * group commit.)
+ */
+ LOGGC_LOCK(log);
+
+ /* init tblock gc state */
+ tblk->flag = tblkGC_QUEUE;
+ tblk->bp = log->bp;
+ tblk->pn = log->page;
+ tblk->eor = log->eor;
+
+ /* enqueue transaction to commit queue */
+ list_add_tail(&tblk->cqueue, &log->cqueue);
+
+ LOGGC_UNLOCK(log);
+ }
+
+ jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
+ le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
+
+ /* page not full ? */
+ if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
+ return lsn;
+
+ pageFull:
+ /* page become full: move on to next page */
+ lmNextPage(log);
+
+ bp = (struct lbuf *) log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ dstoffset = LOGPHDRSIZE;
+ src += nbytes;
+ }
+
+ return lsn;
+}
+
+
+/*
+ * NAME: lmNextPage()
+ *
+ * FUNCTION: write current page and allocate next page.
+ *
+ * PARAMETER: log
+ *
+ * RETURN: 0
+ *
+ * serialization: LOG_LOCK() held on entry/exit
+ */
+static int lmNextPage(struct jfs_log * log)
+{
+ struct logpage *lp;
+ int lspn; /* log sequence page number */
+ int pn; /* current page number */
+ struct lbuf *bp;
+ struct lbuf *nextbp;
+ struct tblock *tblk;
+
+ /* get current log page number and log sequence page number */
+ pn = log->page;
+ bp = log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ lspn = le32_to_cpu(lp->h.page);
+
+ LOGGC_LOCK(log);
+
+ /*
+ * write or queue the full page at the tail of write queue
+ */
+ /* get the tail tblk on commit queue */
+ if (list_empty(&log->cqueue))
+ tblk = NULL;
+ else
+ tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
+
+ /* every tblk who has COMMIT record on the current page,
+ * and has not been committed, must be on commit queue
+ * since tblk is queued at commit queueu at the time
+ * of writing its COMMIT record on the page before
+ * page becomes full (even though the tblk thread
+ * who wrote COMMIT record may have been suspended
+ * currently);
+ */
+
+ /* is page bound with outstanding tail tblk ? */
+ if (tblk && tblk->pn == pn) {
+ /* mark tblk for end-of-page */
+ tblk->flag |= tblkGC_EOP;
+
+ if (log->cflag & logGC_PAGEOUT) {
+ /* if page is not already on write queue,
+ * just enqueue (no lbmWRITE to prevent redrive)
+ * buffer to wqueue to ensure correct serial order
+ * of the pages since log pages will be added
+ * continuously
+ */
+ if (bp->l_wqnext == NULL)
+ lbmWrite(log, bp, 0, 0);
+ } else {
+ /*
+ * No current GC leader, initiate group commit
+ */
+ log->cflag |= logGC_PAGEOUT;
+ lmGCwrite(log, 0);
+ }
+ }
+ /* page is not bound with outstanding tblk:
+ * init write or mark it to be redriven (lbmWRITE)
+ */
+ else {
+ /* finalize the page */
+ bp->l_ceor = bp->l_eor;
+ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
+ lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
+ }
+ LOGGC_UNLOCK(log);
+
+ /*
+ * allocate/initialize next page
+ */
+ /* if log wraps, the first data page of log is 2
+ * (0 never used, 1 is superblock).
+ */
+ log->page = (pn == log->size - 1) ? 2 : pn + 1;
+ log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
+
+ /* allocate/initialize next log page buffer */
+ nextbp = lbmAllocate(log, log->page);
+ nextbp->l_eor = log->eor;
+ log->bp = nextbp;
+
+ /* initialize next log page */
+ lp = (struct logpage *) nextbp->l_ldata;
+ lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
+ lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
+
+ return 0;
+}
+
+
+/*
+ * NAME: lmGroupCommit()
+ *
+ * FUNCTION: group commit
+ * initiate pageout of the pages with COMMIT in the order of
+ * page number - redrive pageout of the page at the head of
+ * pageout queue until full page has been written.
+ *
+ * RETURN:
+ *
+ * NOTE:
+ * LOGGC_LOCK serializes log group commit queue, and
+ * transaction blocks on the commit queue.
+ * N.B. LOG_LOCK is NOT held during lmGroupCommit().
+ */
+int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
+{
+ int rc = 0;
+
+ LOGGC_LOCK(log);
+
+ /* group committed already ? */
+ if (tblk->flag & tblkGC_COMMITTED) {
+ if (tblk->flag & tblkGC_ERROR)
+ rc = -EIO;
+
+ LOGGC_UNLOCK(log);
+ return rc;
+ }
+ jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
+
+ if (tblk->xflag & COMMIT_LAZY)
+ tblk->flag |= tblkGC_LAZY;
+
+ if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
+ (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
+ || jfs_tlocks_low)) {
+ /*
+ * No pageout in progress
+ *
+ * start group commit as its group leader.
+ */
+ log->cflag |= logGC_PAGEOUT;
+
+ lmGCwrite(log, 0);
+ }
+
+ if (tblk->xflag & COMMIT_LAZY) {
+ /*
+ * Lazy transactions can leave now
+ */
+ LOGGC_UNLOCK(log);
+ return 0;
+ }
+
+ /* lmGCwrite gives up LOGGC_LOCK, check again */
+
+ if (tblk->flag & tblkGC_COMMITTED) {
+ if (tblk->flag & tblkGC_ERROR)
+ rc = -EIO;
+
+ LOGGC_UNLOCK(log);
+ return rc;
+ }
+
+ /* upcount transaction waiting for completion
+ */
+ log->gcrtc++;
+ tblk->flag |= tblkGC_READY;
+
+ __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
+ LOGGC_LOCK(log), LOGGC_UNLOCK(log));
+
+ /* removed from commit queue */
+ if (tblk->flag & tblkGC_ERROR)
+ rc = -EIO;
+
+ LOGGC_UNLOCK(log);
+ return rc;
+}
+
+/*
+ * NAME: lmGCwrite()
+ *
+ * FUNCTION: group commit write
+ * initiate write of log page, building a group of all transactions
+ * with commit records on that page.
+ *
+ * RETURN: None
+ *
+ * NOTE:
+ * LOGGC_LOCK must be held by caller.
+ * N.B. LOG_LOCK is NOT held during lmGroupCommit().
+ */
+static void lmGCwrite(struct jfs_log * log, int cant_write)
+{
+ struct lbuf *bp;
+ struct logpage *lp;
+ int gcpn; /* group commit page number */
+ struct tblock *tblk;
+ struct tblock *xtblk = NULL;
+
+ /*
+ * build the commit group of a log page
+ *
+ * scan commit queue and make a commit group of all
+ * transactions with COMMIT records on the same log page.
+ */
+ /* get the head tblk on the commit queue */
+ gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
+
+ list_for_each_entry(tblk, &log->cqueue, cqueue) {
+ if (tblk->pn != gcpn)
+ break;
+
+ xtblk = tblk;
+
+ /* state transition: (QUEUE, READY) -> COMMIT */
+ tblk->flag |= tblkGC_COMMIT;
+ }
+ tblk = xtblk; /* last tblk of the page */
+
+ /*
+ * pageout to commit transactions on the log page.
+ */
+ bp = (struct lbuf *) tblk->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ /* is page already full ? */
+ if (tblk->flag & tblkGC_EOP) {
+ /* mark page to free at end of group commit of the page */
+ tblk->flag &= ~tblkGC_EOP;
+ tblk->flag |= tblkGC_FREE;
+ bp->l_ceor = bp->l_eor;
+ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
+ lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
+ cant_write);
+ INCREMENT(lmStat.full_page);
+ }
+ /* page is not yet full */
+ else {
+ bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
+ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
+ lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
+ INCREMENT(lmStat.partial_page);
+ }
+}
+
+/*
+ * NAME: lmPostGC()
+ *
+ * FUNCTION: group commit post-processing
+ * Processes transactions after their commit records have been written
+ * to disk, redriving log I/O if necessary.
+ *
+ * RETURN: None
+ *
+ * NOTE:
+ * This routine is called a interrupt time by lbmIODone
+ */
+static void lmPostGC(struct lbuf * bp)
+{
+ unsigned long flags;
+ struct jfs_log *log = bp->l_log;
+ struct logpage *lp;
+ struct tblock *tblk, *temp;
+
+ //LOGGC_LOCK(log);
+ spin_lock_irqsave(&log->gclock, flags);
+ /*
+ * current pageout of group commit completed.
+ *
+ * remove/wakeup transactions from commit queue who were
+ * group committed with the current log page
+ */
+ list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
+ if (!(tblk->flag & tblkGC_COMMIT))
+ break;
+ /* if transaction was marked GC_COMMIT then
+ * it has been shipped in the current pageout
+ * and made it to disk - it is committed.
+ */
+
+ if (bp->l_flag & lbmERROR)
+ tblk->flag |= tblkGC_ERROR;
+
+ /* remove it from the commit queue */
+ list_del(&tblk->cqueue);
+ tblk->flag &= ~tblkGC_QUEUE;
+
+ if (tblk == log->flush_tblk) {
+ /* we can stop flushing the log now */
+ clear_bit(log_FLUSH, &log->flag);
+ log->flush_tblk = NULL;
+ }
+
+ jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
+ tblk->flag);
+
+ if (!(tblk->xflag & COMMIT_FORCE))
+ /*
+ * Hand tblk over to lazy commit thread
+ */
+ txLazyUnlock(tblk);
+ else {
+ /* state transition: COMMIT -> COMMITTED */
+ tblk->flag |= tblkGC_COMMITTED;
+
+ if (tblk->flag & tblkGC_READY)
+ log->gcrtc--;
+
+ LOGGC_WAKEUP(tblk);
+ }
+
+ /* was page full before pageout ?
+ * (and this is the last tblk bound with the page)
+ */
+ if (tblk->flag & tblkGC_FREE)
+ lbmFree(bp);
+ /* did page become full after pageout ?
+ * (and this is the last tblk bound with the page)
+ */
+ else if (tblk->flag & tblkGC_EOP) {
+ /* finalize the page */
+ lp = (struct logpage *) bp->l_ldata;
+ bp->l_ceor = bp->l_eor;
+ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
+ jfs_info("lmPostGC: calling lbmWrite");
+ lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
+ 1);
+ }
+
+ }
+
+ /* are there any transactions who have entered lnGroupCommit()
+ * (whose COMMITs are after that of the last log page written.
+ * They are waiting for new group commit (above at (SLEEP 1))
+ * or lazy transactions are on a full (queued) log page,
+ * select the latest ready transaction as new group leader and
+ * wake her up to lead her group.
+ */
+ if ((!list_empty(&log->cqueue)) &&
+ ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
+ test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
+ /*
+ * Call lmGCwrite with new group leader
+ */
+ lmGCwrite(log, 1);
+
+ /* no transaction are ready yet (transactions are only just
+ * queued (GC_QUEUE) and not entered for group commit yet).
+ * the first transaction entering group commit
+ * will elect herself as new group leader.
+ */
+ else
+ log->cflag &= ~logGC_PAGEOUT;
+
+ //LOGGC_UNLOCK(log);
+ spin_unlock_irqrestore(&log->gclock, flags);
+ return;
+}
+
+/*
+ * NAME: lmLogSync()
+ *
+ * FUNCTION: write log SYNCPT record for specified log
+ * if new sync address is available
+ * (normally the case if sync() is executed by back-ground
+ * process).
+ * if not, explicitly run jfs_blogsync() to initiate
+ * getting of new sync address.
+ * calculate new value of i_nextsync which determines when
+ * this code is called again.
+ *
+ * this is called only from lmLog().
+ *
+ * PARAMETER: ip - pointer to logs inode.
+ *
+ * RETURN: 0
+ *
+ * serialization: LOG_LOCK() held on entry/exit
+ */
+static int lmLogSync(struct jfs_log * log, int nosyncwait)
+{
+ int logsize;
+ int written; /* written since last syncpt */
+ int free; /* free space left available */
+ int delta; /* additional delta to write normally */
+ int more; /* additional write granted */
+ struct lrd lrd;
+ int lsn;
+ struct logsyncblk *lp;
+
+ /*
+ * forward syncpt
+ */
+ /* if last sync is same as last syncpt,
+ * invoke sync point forward processing to update sync.
+ */
+
+ if (log->sync == log->syncpt) {
+ LOGSYNC_LOCK(log);
+ /* ToDo: push dirty metapages out to disk */
+// bmLogSync(log);
+
+ if (list_empty(&log->synclist))
+ log->sync = log->lsn;
+ else {
+ lp = list_entry(log->synclist.next,
+ struct logsyncblk, synclist);
+ log->sync = lp->lsn;
+ }
+ LOGSYNC_UNLOCK(log);
+
+ }
+
+ /* if sync is different from last syncpt,
+ * write a SYNCPT record with syncpt = sync.
+ * reset syncpt = sync
+ */
+ if (log->sync != log->syncpt) {
+ struct jfs_sb_info *sbi;
+
+ /*
+ * We need to make sure all of the "written" metapages
+ * actually make it to disk
+ */
+ list_for_each_entry(sbi, &log->sb_list, log_list) {
+ if (sbi->flag & JFS_NOINTEGRITY)
+ continue;
+ filemap_fdatawrite(sbi->ipbmap->i_mapping);
+ filemap_fdatawrite(sbi->ipimap->i_mapping);
+ filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping);
+ }
+ list_for_each_entry(sbi, &log->sb_list, log_list) {
+ if (sbi->flag & JFS_NOINTEGRITY)
+ continue;
+ filemap_fdatawait(sbi->ipbmap->i_mapping);
+ filemap_fdatawait(sbi->ipimap->i_mapping);
+ filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping);
+ }
+
+ lrd.logtid = 0;
+ lrd.backchain = 0;
+ lrd.type = cpu_to_le16(LOG_SYNCPT);
+ lrd.length = 0;
+ lrd.log.syncpt.sync = cpu_to_le32(log->sync);
+ lsn = lmWriteRecord(log, NULL, &lrd, NULL);
+
+ log->syncpt = log->sync;
+ } else
+ lsn = log->lsn;
+
+ /*
+ * setup next syncpt trigger (SWAG)
+ */
+ logsize = log->logsize;
+
+ logdiff(written, lsn, log);
+ free = logsize - written;
+ delta = LOGSYNC_DELTA(logsize);
+ more = min(free / 2, delta);
+ if (more < 2 * LOGPSIZE) {
+ jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
+ /*
+ * log wrapping
+ *
+ * option 1 - panic ? No.!
+ * option 2 - shutdown file systems
+ * associated with log ?
+ * option 3 - extend log ?
+ */
+ /*
+ * option 4 - second chance
+ *
+ * mark log wrapped, and continue.
+ * when all active transactions are completed,
+ * mark log vaild for recovery.
+ * if crashed during invalid state, log state
+ * implies invald log, forcing fsck().
+ */
+ /* mark log state log wrap in log superblock */
+ /* log->state = LOGWRAP; */
+
+ /* reset sync point computation */
+ log->syncpt = log->sync = lsn;
+ log->nextsync = delta;
+ } else
+ /* next syncpt trigger = written + more */
+ log->nextsync = written + more;
+
+ /* return if lmLogSync() from outside of transaction, e.g., sync() */
+ if (nosyncwait)
+ return lsn;
+
+ /* if number of bytes written from last sync point is more
+ * than 1/4 of the log size, stop new transactions from
+ * starting until all current transactions are completed
+ * by setting syncbarrier flag.
+ */
+ if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) {
+ set_bit(log_SYNCBARRIER, &log->flag);
+ jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
+ log->syncpt);
+ /*
+ * We may have to initiate group commit
+ */
+ jfs_flush_journal(log, 0);
+ }
+
+ return lsn;
+}
+
+
+/*
+ * NAME: lmLogOpen()
+ *
+ * FUNCTION: open the log on first open;
+ * insert filesystem in the active list of the log.
+ *
+ * PARAMETER: ipmnt - file system mount inode
+ * iplog - log inode (out)
+ *
+ * RETURN:
+ *
+ * serialization:
+ */
+int lmLogOpen(struct super_block *sb)
+{
+ int rc;
+ struct block_device *bdev;
+ struct jfs_log *log;
+ struct jfs_sb_info *sbi = JFS_SBI(sb);
+
+ if (sbi->flag & JFS_NOINTEGRITY)
+ return open_dummy_log(sb);
+
+ if (sbi->mntflag & JFS_INLINELOG)
+ return open_inline_log(sb);
+
+ down(&jfs_log_sem);
+ list_for_each_entry(log, &jfs_external_logs, journal_list) {
+ if (log->bdev->bd_dev == sbi->logdev) {
+ if (memcmp(log->uuid, sbi->loguuid,
+ sizeof(log->uuid))) {
+ jfs_warn("wrong uuid on JFS journal\n");
+ up(&jfs_log_sem);
+ return -EINVAL;
+ }
+ /*
+ * add file system to log active file system list
+ */
+ if ((rc = lmLogFileSystem(log, sbi, 1))) {
+ up(&jfs_log_sem);
+ return rc;
+ }
+ goto journal_found;
+ }
+ }
+
+ if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
+ up(&jfs_log_sem);
+ return -ENOMEM;
+ }
+ memset(log, 0, sizeof(struct jfs_log));
+ INIT_LIST_HEAD(&log->sb_list);
+ init_waitqueue_head(&log->syncwait);
+
+ /*
+ * external log as separate logical volume
+ *
+ * file systems to log may have n-to-1 relationship;
+ */
+
+ bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE);
+ if (IS_ERR(bdev)) {
+ rc = -PTR_ERR(bdev);
+ goto free;
+ }
+
+ if ((rc = bd_claim(bdev, log))) {
+ goto close;
+ }
+
+ log->bdev = bdev;
+ memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
+
+ /*
+ * initialize log:
+ */
+ if ((rc = lmLogInit(log)))
+ goto unclaim;
+
+ list_add(&log->journal_list, &jfs_external_logs);
+
+ /*
+ * add file system to log active file system list
+ */
+ if ((rc = lmLogFileSystem(log, sbi, 1)))
+ goto shutdown;
+
+journal_found:
+ LOG_LOCK(log);
+ list_add(&sbi->log_list, &log->sb_list);
+ sbi->log = log;
+ LOG_UNLOCK(log);
+
+ up(&jfs_log_sem);
+ return 0;
+
+ /*
+ * unwind on error
+ */
+ shutdown: /* unwind lbmLogInit() */
+ list_del(&log->journal_list);
+ lbmLogShutdown(log);
+
+ unclaim:
+ bd_release(bdev);
+
+ close: /* close external log device */
+ blkdev_put(bdev);
+
+ free: /* free log descriptor */
+ up(&jfs_log_sem);
+ kfree(log);
+
+ jfs_warn("lmLogOpen: exit(%d)", rc);
+ return rc;
+}
+
+static int open_inline_log(struct super_block *sb)
+{
+ struct jfs_log *log;
+ int rc;
+
+ if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL)))
+ return -ENOMEM;
+ memset(log, 0, sizeof(struct jfs_log));
+ INIT_LIST_HEAD(&log->sb_list);
+ init_waitqueue_head(&log->syncwait);
+
+ set_bit(log_INLINELOG, &log->flag);
+ log->bdev = sb->s_bdev;
+ log->base = addressPXD(&JFS_SBI(sb)->logpxd);
+ log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
+ (L2LOGPSIZE - sb->s_blocksize_bits);
+ log->l2bsize = sb->s_blocksize_bits;
+ ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
+
+ /*
+ * initialize log.
+ */
+ if ((rc = lmLogInit(log))) {
+ kfree(log);
+ jfs_warn("lmLogOpen: exit(%d)", rc);
+ return rc;
+ }
+
+ list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
+ JFS_SBI(sb)->log = log;
+
+ return rc;
+}
+
+static int open_dummy_log(struct super_block *sb)
+{
+ int rc;
+
+ down(&jfs_log_sem);
+ if (!dummy_log) {
+ dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL);
+ if (!dummy_log) {
+ up(&jfs_log_sem);
+ return -ENOMEM;
+ }
+ memset(dummy_log, 0, sizeof(struct jfs_log));
+ INIT_LIST_HEAD(&dummy_log->sb_list);
+ init_waitqueue_head(&dummy_log->syncwait);
+ dummy_log->no_integrity = 1;
+ /* Make up some stuff */
+ dummy_log->base = 0;
+ dummy_log->size = 1024;
+ rc = lmLogInit(dummy_log);
+ if (rc) {
+ kfree(dummy_log);
+ dummy_log = NULL;
+ up(&jfs_log_sem);
+ return rc;
+ }
+ }
+
+ LOG_LOCK(dummy_log);
+ list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
+ JFS_SBI(sb)->log = dummy_log;
+ LOG_UNLOCK(dummy_log);
+ up(&jfs_log_sem);
+
+ return 0;
+}
+
+/*
+ * NAME: lmLogInit()
+ *
+ * FUNCTION: log initialization at first log open.
+ *
+ * logredo() (or logformat()) should have been run previously.
+ * initialize the log from log superblock.
+ * set the log state in the superblock to LOGMOUNT and
+ * write SYNCPT log record.
+ *
+ * PARAMETER: log - log structure
+ *
+ * RETURN: 0 - if ok
+ * -EINVAL - bad log magic number or superblock dirty
+ * error returned from logwait()
+ *
+ * serialization: single first open thread
+ */
+int lmLogInit(struct jfs_log * log)
+{
+ int rc = 0;
+ struct lrd lrd;
+ struct logsuper *logsuper;
+ struct lbuf *bpsuper;
+ struct lbuf *bp;
+ struct logpage *lp;
+ int lsn = 0;
+
+ jfs_info("lmLogInit: log:0x%p", log);
+
+ /* initialize the group commit serialization lock */
+ LOGGC_LOCK_INIT(log);
+
+ /* allocate/initialize the log write serialization lock */
+ LOG_LOCK_INIT(log);
+
+ LOGSYNC_LOCK_INIT(log);
+
+ INIT_LIST_HEAD(&log->synclist);
+
+ INIT_LIST_HEAD(&log->cqueue);
+ log->flush_tblk = NULL;
+
+ log->count = 0;
+
+ /*
+ * initialize log i/o
+ */
+ if ((rc = lbmLogInit(log)))
+ return rc;
+
+ if (!test_bit(log_INLINELOG, &log->flag))
+ log->l2bsize = L2LOGPSIZE;
+
+ /* check for disabled journaling to disk */
+ if (log->no_integrity) {
+ /*
+ * Journal pages will still be filled. When the time comes
+ * to actually do the I/O, the write is not done, and the
+ * endio routine is called directly.
+ */
+ bp = lbmAllocate(log , 0);
+ log->bp = bp;
+ bp->l_pn = bp->l_eor = 0;
+ } else {
+ /*
+ * validate log superblock
+ */
+ if ((rc = lbmRead(log, 1, &bpsuper)))
+ goto errout10;
+
+ logsuper = (struct logsuper *) bpsuper->l_ldata;
+
+ if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
+ jfs_warn("*** Log Format Error ! ***");
+ rc = -EINVAL;
+ goto errout20;
+ }
+
+ /* logredo() should have been run successfully. */
+ if (logsuper->state != cpu_to_le32(LOGREDONE)) {
+ jfs_warn("*** Log Is Dirty ! ***");
+ rc = -EINVAL;
+ goto errout20;
+ }
+
+ /* initialize log from log superblock */
+ if (test_bit(log_INLINELOG,&log->flag)) {
+ if (log->size != le32_to_cpu(logsuper->size)) {
+ rc = -EINVAL;
+ goto errout20;
+ }
+ jfs_info("lmLogInit: inline log:0x%p base:0x%Lx "
+ "size:0x%x", log,
+ (unsigned long long) log->base, log->size);
+ } else {
+ if (memcmp(logsuper->uuid, log->uuid, 16)) {
+ jfs_warn("wrong uuid on JFS log device");
+ goto errout20;
+ }
+ log->size = le32_to_cpu(logsuper->size);
+ log->l2bsize = le32_to_cpu(logsuper->l2bsize);
+ jfs_info("lmLogInit: external log:0x%p base:0x%Lx "
+ "size:0x%x", log,
+ (unsigned long long) log->base, log->size);
+ }
+
+ log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
+ log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
+
+ /*
+ * initialize for log append write mode
+ */
+ /* establish current/end-of-log page/buffer */
+ if ((rc = lbmRead(log, log->page, &bp)))
+ goto errout20;
+
+ lp = (struct logpage *) bp->l_ldata;
+
+ jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
+ le32_to_cpu(logsuper->end), log->page, log->eor,
+ le16_to_cpu(lp->h.eor));
+
+ log->bp = bp;
+ bp->l_pn = log->page;
+ bp->l_eor = log->eor;
+
+ /* if current page is full, move on to next page */
+ if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
+ lmNextPage(log);
+
+ /*
+ * initialize log syncpoint
+ */
+ /*
+ * write the first SYNCPT record with syncpoint = 0
+ * (i.e., log redo up to HERE !);
+ * remove current page from lbm write queue at end of pageout
+ * (to write log superblock update), but do not release to
+ * freelist;
+ */
+ lrd.logtid = 0;
+ lrd.backchain = 0;
+ lrd.type = cpu_to_le16(LOG_SYNCPT);
+ lrd.length = 0;
+ lrd.log.syncpt.sync = 0;
+ lsn = lmWriteRecord(log, NULL, &lrd, NULL);
+ bp = log->bp;
+ bp->l_ceor = bp->l_eor;
+ lp = (struct logpage *) bp->l_ldata;
+ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
+ lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
+ if ((rc = lbmIOWait(bp, 0)))
+ goto errout30;
+
+ /*
+ * update/write superblock
+ */
+ logsuper->state = cpu_to_le32(LOGMOUNT);
+ log->serial = le32_to_cpu(logsuper->serial) + 1;
+ logsuper->serial = cpu_to_le32(log->serial);
+ lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
+ if ((rc = lbmIOWait(bpsuper, lbmFREE)))
+ goto errout30;
+ }
+
+ /* initialize logsync parameters */
+ log->logsize = (log->size - 2) << L2LOGPSIZE;
+ log->lsn = lsn;
+ log->syncpt = lsn;
+ log->sync = log->syncpt;
+ log->nextsync = LOGSYNC_DELTA(log->logsize);
+
+ jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
+ log->lsn, log->syncpt, log->sync);
+
+ /*
+ * initialize for lazy/group commit
+ */
+ log->clsn = lsn;
+
+ return 0;
+
+ /*
+ * unwind on error
+ */
+ errout30: /* release log page */
+ log->wqueue = NULL;
+ bp->l_wqnext = NULL;
+ lbmFree(bp);
+
+ errout20: /* release log superblock */
+ lbmFree(bpsuper);
+
+ errout10: /* unwind lbmLogInit() */
+ lbmLogShutdown(log);
+
+ jfs_warn("lmLogInit: exit(%d)", rc);
+ return rc;
+}
+
+
+/*
+ * NAME: lmLogClose()
+ *
+ * FUNCTION: remove file system <ipmnt> from active list of log <iplog>
+ * and close it on last close.
+ *
+ * PARAMETER: sb - superblock
+ *
+ * RETURN: errors from subroutines
+ *
+ * serialization:
+ */
+int lmLogClose(struct super_block *sb)
+{
+ struct jfs_sb_info *sbi = JFS_SBI(sb);
+ struct jfs_log *log = sbi->log;
+ struct block_device *bdev;
+ int rc = 0;
+
+ jfs_info("lmLogClose: log:0x%p", log);
+
+ down(&jfs_log_sem);
+ LOG_LOCK(log);
+ list_del(&sbi->log_list);
+ LOG_UNLOCK(log);
+ sbi->log = NULL;
+
+ /*
+ * We need to make sure all of the "written" metapages
+ * actually make it to disk
+ */
+ sync_blockdev(sb->s_bdev);
+
+ if (test_bit(log_INLINELOG, &log->flag)) {
+ /*
+ * in-line log in host file system
+ */
+ rc = lmLogShutdown(log);
+ kfree(log);
+ goto out;
+ }
+
+ if (!log->no_integrity)
+ lmLogFileSystem(log, sbi, 0);
+
+ if (!list_empty(&log->sb_list))
+ goto out;
+
+ /*
+ * TODO: ensure that the dummy_log is in a state to allow
+ * lbmLogShutdown to deallocate all the buffers and call
+ * kfree against dummy_log. For now, leave dummy_log & its
+ * buffers in memory, and resuse if another no-integrity mount
+ * is requested.
+ */
+ if (log->no_integrity)
+ goto out;
+
+ /*
+ * external log as separate logical volume
+ */
+ list_del(&log->journal_list);
+ bdev = log->bdev;
+ rc = lmLogShutdown(log);
+
+ bd_release(bdev);
+ blkdev_put(bdev);
+
+ kfree(log);
+
+ out:
+ up(&jfs_log_sem);
+ jfs_info("lmLogClose: exit(%d)", rc);
+ return rc;
+}
+
+
+/*
+ * NAME: jfs_flush_journal()
+ *
+ * FUNCTION: initiate write of any outstanding transactions to the journal
+ * and optionally wait until they are all written to disk
+ *
+ * wait == 0 flush until latest txn is committed, don't wait
+ * wait == 1 flush until latest txn is committed, wait
+ * wait > 1 flush until all txn's are complete, wait
+ */
+void jfs_flush_journal(struct jfs_log *log, int wait)
+{
+ int i;
+ struct tblock *target = NULL;
+
+ /* jfs_write_inode may call us during read-only mount */
+ if (!log)
+ return;
+
+ jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
+
+ LOGGC_LOCK(log);
+
+ if (!list_empty(&log->cqueue)) {
+ /*
+ * This ensures that we will keep writing to the journal as long
+ * as there are unwritten commit records
+ */
+ target = list_entry(log->cqueue.prev, struct tblock, cqueue);
+
+ if (test_bit(log_FLUSH, &log->flag)) {
+ /*
+ * We're already flushing.
+ * if flush_tblk is NULL, we are flushing everything,
+ * so leave it that way. Otherwise, update it to the
+ * latest transaction
+ */
+ if (log->flush_tblk)
+ log->flush_tblk = target;
+ } else {
+ /* Only flush until latest transaction is committed */
+ log->flush_tblk = target;
+ set_bit(log_FLUSH, &log->flag);
+
+ /*
+ * Initiate I/O on outstanding transactions
+ */
+ if (!(log->cflag & logGC_PAGEOUT)) {
+ log->cflag |= logGC_PAGEOUT;
+ lmGCwrite(log, 0);
+ }
+ }
+ }
+ if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
+ /* Flush until all activity complete */
+ set_bit(log_FLUSH, &log->flag);
+ log->flush_tblk = NULL;
+ }
+
+ if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
+ DECLARE_WAITQUEUE(__wait, current);
+
+ add_wait_queue(&target->gcwait, &__wait);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ LOGGC_UNLOCK(log);
+ schedule();
+ current->state = TASK_RUNNING;
+ LOGGC_LOCK(log);
+ remove_wait_queue(&target->gcwait, &__wait);
+ }
+ LOGGC_UNLOCK(log);
+
+ if (wait < 2)
+ return;
+
+ /*
+ * If there was recent activity, we may need to wait
+ * for the lazycommit thread to catch up
+ */
+ if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
+ for (i = 0; i < 800; i++) { /* Too much? */
+ msleep(250);
+ if (list_empty(&log->cqueue) &&
+ list_empty(&log->synclist))
+ break;
+ }
+ }
+ assert(list_empty(&log->cqueue));
+ assert(list_empty(&log->synclist));
+ clear_bit(log_FLUSH, &log->flag);
+}
+
+/*
+ * NAME: lmLogShutdown()
+ *
+ * FUNCTION: log shutdown at last LogClose().
+ *
+ * write log syncpt record.
+ * update super block to set redone flag to 0.
+ *
+ * PARAMETER: log - log inode
+ *
+ * RETURN: 0 - success
+ *
+ * serialization: single last close thread
+ */
+int lmLogShutdown(struct jfs_log * log)
+{
+ int rc;
+ struct lrd lrd;
+ int lsn;
+ struct logsuper *logsuper;
+ struct lbuf *bpsuper;
+ struct lbuf *bp;
+ struct logpage *lp;
+
+ jfs_info("lmLogShutdown: log:0x%p", log);
+
+ jfs_flush_journal(log, 2);
+
+ /*
+ * write the last SYNCPT record with syncpoint = 0
+ * (i.e., log redo up to HERE !)
+ */
+ lrd.logtid = 0;
+ lrd.backchain = 0;
+ lrd.type = cpu_to_le16(LOG_SYNCPT);
+ lrd.length = 0;
+ lrd.log.syncpt.sync = 0;
+
+ lsn = lmWriteRecord(log, NULL, &lrd, NULL);
+ bp = log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
+ lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
+ lbmIOWait(log->bp, lbmFREE);
+
+ /*
+ * synchronous update log superblock
+ * mark log state as shutdown cleanly
+ * (i.e., Log does not need to be replayed).
+ */
+ if ((rc = lbmRead(log, 1, &bpsuper)))
+ goto out;
+
+ logsuper = (struct logsuper *) bpsuper->l_ldata;
+ logsuper->state = cpu_to_le32(LOGREDONE);
+ logsuper->end = cpu_to_le32(lsn);
+ lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
+ rc = lbmIOWait(bpsuper, lbmFREE);
+
+ jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
+ lsn, log->page, log->eor);
+
+ out:
+ /*
+ * shutdown per log i/o
+ */
+ lbmLogShutdown(log);
+
+ if (rc) {
+ jfs_warn("lmLogShutdown: exit(%d)", rc);
+ }
+ return rc;
+}
+
+
+/*
+ * NAME: lmLogFileSystem()
+ *
+ * FUNCTION: insert (<activate> = true)/remove (<activate> = false)
+ * file system into/from log active file system list.
+ *
+ * PARAMETE: log - pointer to logs inode.
+ * fsdev - kdev_t of filesystem.
+ * serial - pointer to returned log serial number
+ * activate - insert/remove device from active list.
+ *
+ * RETURN: 0 - success
+ * errors returned by vms_iowait().
+ */
+static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
+ int activate)
+{
+ int rc = 0;
+ int i;
+ struct logsuper *logsuper;
+ struct lbuf *bpsuper;
+ char *uuid = sbi->uuid;
+
+ /*
+ * insert/remove file system device to log active file system list.
+ */
+ if ((rc = lbmRead(log, 1, &bpsuper)))
+ return rc;
+
+ logsuper = (struct logsuper *) bpsuper->l_ldata;
+ if (activate) {
+ for (i = 0; i < MAX_ACTIVE; i++)
+ if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
+ memcpy(logsuper->active[i].uuid, uuid, 16);
+ sbi->aggregate = i;
+ break;
+ }
+ if (i == MAX_ACTIVE) {
+ jfs_warn("Too many file systems sharing journal!");
+ lbmFree(bpsuper);
+ return -EMFILE; /* Is there a better rc? */
+ }
+ } else {
+ for (i = 0; i < MAX_ACTIVE; i++)
+ if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
+ memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
+ break;
+ }
+ if (i == MAX_ACTIVE) {
+ jfs_warn("Somebody stomped on the journal!");
+ lbmFree(bpsuper);
+ return -EIO;
+ }
+
+ }
+
+ /*
+ * synchronous write log superblock:
+ *
+ * write sidestream bypassing write queue:
+ * at file system mount, log super block is updated for
+ * activation of the file system before any log record
+ * (MOUNT record) of the file system, and at file system
+ * unmount, all meta data for the file system has been
+ * flushed before log super block is updated for deactivation
+ * of the file system.
+ */
+ lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
+ rc = lbmIOWait(bpsuper, lbmFREE);
+
+ return rc;
+}
+
+/*
+ * log buffer manager (lbm)
+ * ------------------------
+ *
+ * special purpose buffer manager supporting log i/o requirements.
+ *
+ * per log write queue:
+ * log pageout occurs in serial order by fifo write queue and
+ * restricting to a single i/o in pregress at any one time.
+ * a circular singly-linked list
+ * (log->wrqueue points to the tail, and buffers are linked via
+ * bp->wrqueue field), and
+ * maintains log page in pageout ot waiting for pageout in serial pageout.
+ */
+
+/*
+ * lbmLogInit()
+ *
+ * initialize per log I/O setup at lmLogInit()
+ */
+static int lbmLogInit(struct jfs_log * log)
+{ /* log inode */
+ int i;
+ struct lbuf *lbuf;
+
+ jfs_info("lbmLogInit: log:0x%p", log);
+
+ /* initialize current buffer cursor */
+ log->bp = NULL;
+
+ /* initialize log device write queue */
+ log->wqueue = NULL;
+
+ /*
+ * Each log has its own buffer pages allocated to it. These are
+ * not managed by the page cache. This ensures that a transaction
+ * writing to the log does not block trying to allocate a page from
+ * the page cache (for the log). This would be bad, since page
+ * allocation waits on the kswapd thread that may be committing inodes
+ * which would cause log activity. Was that clear? I'm trying to
+ * avoid deadlock here.
+ */
+ init_waitqueue_head(&log->free_wait);
+
+ log->lbuf_free = NULL;
+
+ for (i = 0; i < LOGPAGES; i++) {
+ lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
+ if (lbuf == 0)
+ goto error;
+ lbuf->l_ldata = (char *) get_zeroed_page(GFP_KERNEL);
+ if (lbuf->l_ldata == 0) {
+ kfree(lbuf);
+ goto error;
+ }
+ lbuf->l_log = log;
+ init_waitqueue_head(&lbuf->l_ioevent);
+
+ lbuf->l_freelist = log->lbuf_free;
+ log->lbuf_free = lbuf;
+ }
+
+ return (0);
+
+ error:
+ lbmLogShutdown(log);
+ return -ENOMEM;
+}
+
+
+/*
+ * lbmLogShutdown()
+ *
+ * finalize per log I/O setup at lmLogShutdown()
+ */
+static void lbmLogShutdown(struct jfs_log * log)
+{
+ struct lbuf *lbuf;
+
+ jfs_info("lbmLogShutdown: log:0x%p", log);
+
+ lbuf = log->lbuf_free;
+ while (lbuf) {
+ struct lbuf *next = lbuf->l_freelist;
+ free_page((unsigned long) lbuf->l_ldata);
+ kfree(lbuf);
+ lbuf = next;
+ }
+
+ log->bp = NULL;
+}
+
+
+/*
+ * lbmAllocate()
+ *
+ * allocate an empty log buffer
+ */
+static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
+{
+ struct lbuf *bp;
+ unsigned long flags;
+
+ /*
+ * recycle from log buffer freelist if any
+ */
+ LCACHE_LOCK(flags);
+ LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
+ log->lbuf_free = bp->l_freelist;
+ LCACHE_UNLOCK(flags);
+
+ bp->l_flag = 0;
+
+ bp->l_wqnext = NULL;
+ bp->l_freelist = NULL;
+
+ bp->l_pn = pn;
+ bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
+ bp->l_ceor = 0;
+
+ return bp;
+}
+
+
+/*
+ * lbmFree()
+ *
+ * release a log buffer to freelist
+ */
+static void lbmFree(struct lbuf * bp)
+{
+ unsigned long flags;
+
+ LCACHE_LOCK(flags);
+
+ lbmfree(bp);
+
+ LCACHE_UNLOCK(flags);
+}
+
+static void lbmfree(struct lbuf * bp)
+{
+ struct jfs_log *log = bp->l_log;
+
+ assert(bp->l_wqnext == NULL);
+
+ /*
+ * return the buffer to head of freelist
+ */
+ bp->l_freelist = log->lbuf_free;
+ log->lbuf_free = bp;
+
+ wake_up(&log->free_wait);
+ return;
+}
+
+
+/*
+ * NAME: lbmRedrive
+ *
+ * FUNCTION: add a log buffer to the the log redrive list
+ *
+ * PARAMETER:
+ * bp - log buffer
+ *
+ * NOTES:
+ * Takes log_redrive_lock.
+ */
+static inline void lbmRedrive(struct lbuf *bp)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&log_redrive_lock, flags);
+ bp->l_redrive_next = log_redrive_list;
+ log_redrive_list = bp;
+ spin_unlock_irqrestore(&log_redrive_lock, flags);
+
+ wake_up(&jfs_IO_thread_wait);
+}
+
+
+/*
+ * lbmRead()
+ */
+static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
+{
+ struct bio *bio;
+ struct lbuf *bp;
+
+ /*
+ * allocate a log buffer
+ */
+ *bpp = bp = lbmAllocate(log, pn);
+ jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
+
+ bp->l_flag |= lbmREAD;
+
+ bio = bio_alloc(GFP_NOFS, 1);
+
+ bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
+ bio->bi_bdev = log->bdev;
+ bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata);
+ bio->bi_io_vec[0].bv_len = LOGPSIZE;
+ bio->bi_io_vec[0].bv_offset = 0;
+
+ bio->bi_vcnt = 1;
+ bio->bi_idx = 0;
+ bio->bi_size = LOGPSIZE;
+
+ bio->bi_end_io = lbmIODone;
+ bio->bi_private = bp;
+ submit_bio(READ_SYNC, bio);
+
+ wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
+
+ return 0;
+}
+
+
+/*
+ * lbmWrite()
+ *
+ * buffer at head of pageout queue stays after completion of
+ * partial-page pageout and redriven by explicit initiation of
+ * pageout by caller until full-page pageout is completed and
+ * released.
+ *
+ * device driver i/o done redrives pageout of new buffer at
+ * head of pageout queue when current buffer at head of pageout
+ * queue is released at the completion of its full-page pageout.
+ *
+ * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
+ * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
+ */
+static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
+ int cant_block)
+{
+ struct lbuf *tail;
+ unsigned long flags;
+
+ jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
+
+ /* map the logical block address to physical block address */
+ bp->l_blkno =
+ log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
+
+ LCACHE_LOCK(flags); /* disable+lock */
+
+ /*
+ * initialize buffer for device driver
+ */
+ bp->l_flag = flag;
+
+ /*
+ * insert bp at tail of write queue associated with log
+ *
+ * (request is either for bp already/currently at head of queue
+ * or new bp to be inserted at tail)
+ */
+ tail = log->wqueue;
+
+ /* is buffer not already on write queue ? */
+ if (bp->l_wqnext == NULL) {
+ /* insert at tail of wqueue */
+ if (tail == NULL) {
+ log->wqueue = bp;
+ bp->l_wqnext = bp;
+ } else {
+ log->wqueue = bp;
+ bp->l_wqnext = tail->l_wqnext;
+ tail->l_wqnext = bp;
+ }
+
+ tail = bp;
+ }
+
+ /* is buffer at head of wqueue and for write ? */
+ if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
+ LCACHE_UNLOCK(flags); /* unlock+enable */
+ return;
+ }
+
+ LCACHE_UNLOCK(flags); /* unlock+enable */
+
+ if (cant_block)
+ lbmRedrive(bp);
+ else if (flag & lbmSYNC)
+ lbmStartIO(bp);
+ else {
+ LOGGC_UNLOCK(log);
+ lbmStartIO(bp);
+ LOGGC_LOCK(log);
+ }
+}
+
+
+/*
+ * lbmDirectWrite()
+ *
+ * initiate pageout bypassing write queue for sidestream
+ * (e.g., log superblock) write;
+ */
+static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
+{
+ jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
+ bp, flag, bp->l_pn);
+
+ /*
+ * initialize buffer for device driver
+ */
+ bp->l_flag = flag | lbmDIRECT;
+
+ /* map the logical block address to physical block address */
+ bp->l_blkno =
+ log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
+
+ /*
+ * initiate pageout of the page
+ */
+ lbmStartIO(bp);
+}
+
+
+/*
+ * NAME: lbmStartIO()
+ *
+ * FUNCTION: Interface to DD strategy routine
+ *
+ * RETURN: none
+ *
+ * serialization: LCACHE_LOCK() is NOT held during log i/o;
+ */
+static void lbmStartIO(struct lbuf * bp)
+{
+ struct bio *bio;
+ struct jfs_log *log = bp->l_log;
+
+ jfs_info("lbmStartIO\n");
+
+ bio = bio_alloc(GFP_NOFS, 1);
+ bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
+ bio->bi_bdev = log->bdev;
+ bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata);
+ bio->bi_io_vec[0].bv_len = LOGPSIZE;
+ bio->bi_io_vec[0].bv_offset = 0;
+
+ bio->bi_vcnt = 1;
+ bio->bi_idx = 0;
+ bio->bi_size = LOGPSIZE;
+
+ bio->bi_end_io = lbmIODone;
+ bio->bi_private = bp;
+
+ /* check if journaling to disk has been disabled */
+ if (!log->no_integrity) {
+ submit_bio(WRITE_SYNC, bio);
+ INCREMENT(lmStat.submitted);
+ }
+ else {
+ bio->bi_size = 0;
+ lbmIODone(bio, 0, 0); /* 2nd argument appears to not be used => 0
+ * 3rd argument appears to not be used => 0
+ */
+ }
+}
+
+
+/*
+ * lbmIOWait()
+ */
+static int lbmIOWait(struct lbuf * bp, int flag)
+{
+ unsigned long flags;
+ int rc = 0;
+
+ jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
+
+ LCACHE_LOCK(flags); /* disable+lock */
+
+ LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
+
+ rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
+
+ if (flag & lbmFREE)
+ lbmfree(bp);
+
+ LCACHE_UNLOCK(flags); /* unlock+enable */
+
+ jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
+ return rc;
+}
+
+/*
+ * lbmIODone()
+ *
+ * executed at INTIODONE level
+ */
+static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
+{
+ struct lbuf *bp = bio->bi_private;
+ struct lbuf *nextbp, *tail;
+ struct jfs_log *log;
+ unsigned long flags;
+
+ if (bio->bi_size)
+ return 1;
+
+ /*
+ * get back jfs buffer bound to the i/o buffer
+ */
+ jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
+
+ LCACHE_LOCK(flags); /* disable+lock */
+
+ bp->l_flag |= lbmDONE;
+
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ bp->l_flag |= lbmERROR;
+
+ jfs_err("lbmIODone: I/O error in JFS log");
+ }
+
+ bio_put(bio);
+
+ /*
+ * pagein completion
+ */
+ if (bp->l_flag & lbmREAD) {
+ bp->l_flag &= ~lbmREAD;
+
+ LCACHE_UNLOCK(flags); /* unlock+enable */
+
+ /* wakeup I/O initiator */
+ LCACHE_WAKEUP(&bp->l_ioevent);
+
+ return 0;
+ }
+
+ /*
+ * pageout completion
+ *
+ * the bp at the head of write queue has completed pageout.
+ *
+ * if single-commit/full-page pageout, remove the current buffer
+ * from head of pageout queue, and redrive pageout with
+ * the new buffer at head of pageout queue;
+ * otherwise, the partial-page pageout buffer stays at
+ * the head of pageout queue to be redriven for pageout
+ * by lmGroupCommit() until full-page pageout is completed.
+ */
+ bp->l_flag &= ~lbmWRITE;
+ INCREMENT(lmStat.pagedone);
+
+ /* update committed lsn */
+ log = bp->l_log;
+ log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
+
+ if (bp->l_flag & lbmDIRECT) {
+ LCACHE_WAKEUP(&bp->l_ioevent);
+ LCACHE_UNLOCK(flags);
+ return 0;
+ }
+
+ tail = log->wqueue;
+
+ /* single element queue */
+ if (bp == tail) {
+ /* remove head buffer of full-page pageout
+ * from log device write queue
+ */
+ if (bp->l_flag & lbmRELEASE) {
+ log->wqueue = NULL;
+ bp->l_wqnext = NULL;
+ }
+ }
+ /* multi element queue */
+ else {
+ /* remove head buffer of full-page pageout
+ * from log device write queue
+ */
+ if (bp->l_flag & lbmRELEASE) {
+ nextbp = tail->l_wqnext = bp->l_wqnext;
+ bp->l_wqnext = NULL;
+
+ /*
+ * redrive pageout of next page at head of write queue:
+ * redrive next page without any bound tblk
+ * (i.e., page w/o any COMMIT records), or
+ * first page of new group commit which has been
+ * queued after current page (subsequent pageout
+ * is performed synchronously, except page without
+ * any COMMITs) by lmGroupCommit() as indicated
+ * by lbmWRITE flag;
+ */
+ if (nextbp->l_flag & lbmWRITE) {
+ /*
+ * We can't do the I/O at interrupt time.
+ * The jfsIO thread can do it
+ */
+ lbmRedrive(nextbp);
+ }
+ }
+ }
+
+ /*
+ * synchronous pageout:
+ *
+ * buffer has not necessarily been removed from write queue
+ * (e.g., synchronous write of partial-page with COMMIT):
+ * leave buffer for i/o initiator to dispose
+ */
+ if (bp->l_flag & lbmSYNC) {
+ LCACHE_UNLOCK(flags); /* unlock+enable */
+
+ /* wakeup I/O initiator */
+ LCACHE_WAKEUP(&bp->l_ioevent);
+ }
+
+ /*
+ * Group Commit pageout:
+ */
+ else if (bp->l_flag & lbmGC) {
+ LCACHE_UNLOCK(flags);
+ lmPostGC(bp);
+ }
+
+ /*
+ * asynchronous pageout:
+ *
+ * buffer must have been removed from write queue:
+ * insert buffer at head of freelist where it can be recycled
+ */
+ else {
+ assert(bp->l_flag & lbmRELEASE);
+ assert(bp->l_flag & lbmFREE);
+ lbmfree(bp);
+
+ LCACHE_UNLOCK(flags); /* unlock+enable */
+ }
+
+ return 0;
+}
+
+int jfsIOWait(void *arg)
+{
+ struct lbuf *bp;
+
+ daemonize("jfsIO");
+
+ complete(&jfsIOwait);
+
+ do {
+ DECLARE_WAITQUEUE(wq, current);
+
+ spin_lock_irq(&log_redrive_lock);
+ while ((bp = log_redrive_list) != 0) {
+ log_redrive_list = bp->l_redrive_next;
+ bp->l_redrive_next = NULL;
+ spin_unlock_irq(&log_redrive_lock);
+ lbmStartIO(bp);
+ spin_lock_irq(&log_redrive_lock);
+ }
+ if (current->flags & PF_FREEZE) {
+ spin_unlock_irq(&log_redrive_lock);
+ refrigerator(PF_FREEZE);
+ } else {
+ add_wait_queue(&jfs_IO_thread_wait, &wq);
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock_irq(&log_redrive_lock);
+ schedule();
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&jfs_IO_thread_wait, &wq);
+ }
+ } while (!jfs_stop_threads);
+
+ jfs_info("jfsIOWait being killed!");
+ complete_and_exit(&jfsIOwait, 0);
+}
+
+/*
+ * NAME: lmLogFormat()/jfs_logform()
+ *
+ * FUNCTION: format file system log
+ *
+ * PARAMETERS:
+ * log - volume log
+ * logAddress - start address of log space in FS block
+ * logSize - length of log space in FS block;
+ *
+ * RETURN: 0 - success
+ * -EIO - i/o error
+ *
+ * XXX: We're synchronously writing one page at a time. This needs to
+ * be improved by writing multiple pages at once.
+ */
+int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
+{
+ int rc = -EIO;
+ struct jfs_sb_info *sbi;
+ struct logsuper *logsuper;
+ struct logpage *lp;
+ int lspn; /* log sequence page number */
+ struct lrd *lrd_ptr;
+ int npages = 0;
+ struct lbuf *bp;
+
+ jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
+ (long long)logAddress, logSize);
+
+ sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
+
+ /* allocate a log buffer */
+ bp = lbmAllocate(log, 1);
+
+ npages = logSize >> sbi->l2nbperpage;
+
+ /*
+ * log space:
+ *
+ * page 0 - reserved;
+ * page 1 - log superblock;
+ * page 2 - log data page: A SYNC log record is written
+ * into this page at logform time;
+ * pages 3-N - log data page: set to empty log data pages;
+ */
+ /*
+ * init log superblock: log page 1
+ */
+ logsuper = (struct logsuper *) bp->l_ldata;
+
+ logsuper->magic = cpu_to_le32(LOGMAGIC);
+ logsuper->version = cpu_to_le32(LOGVERSION);
+ logsuper->state = cpu_to_le32(LOGREDONE);
+ logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */
+ logsuper->size = cpu_to_le32(npages);
+ logsuper->bsize = cpu_to_le32(sbi->bsize);
+ logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
+ logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
+
+ bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
+ bp->l_blkno = logAddress + sbi->nbperpage;
+ lbmStartIO(bp);
+ if ((rc = lbmIOWait(bp, 0)))
+ goto exit;
+
+ /*
+ * init pages 2 to npages-1 as log data pages:
+ *
+ * log page sequence number (lpsn) initialization:
+ *
+ * pn: 0 1 2 3 n-1
+ * +-----+-----+=====+=====+===.....===+=====+
+ * lspn: N-1 0 1 N-2
+ * <--- N page circular file ---->
+ *
+ * the N (= npages-2) data pages of the log is maintained as
+ * a circular file for the log records;
+ * lpsn grows by 1 monotonically as each log page is written
+ * to the circular file of the log;
+ * and setLogpage() will not reset the page number even if
+ * the eor is equal to LOGPHDRSIZE. In order for binary search
+ * still work in find log end process, we have to simulate the
+ * log wrap situation at the log format time.
+ * The 1st log page written will have the highest lpsn. Then
+ * the succeeding log pages will have ascending order of
+ * the lspn starting from 0, ... (N-2)
+ */
+ lp = (struct logpage *) bp->l_ldata;
+ /*
+ * initialize 1st log page to be written: lpsn = N - 1,
+ * write a SYNCPT log record is written to this page
+ */
+ lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
+ lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
+
+ lrd_ptr = (struct lrd *) &lp->data;
+ lrd_ptr->logtid = 0;
+ lrd_ptr->backchain = 0;
+ lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
+ lrd_ptr->length = 0;
+ lrd_ptr->log.syncpt.sync = 0;
+
+ bp->l_blkno += sbi->nbperpage;
+ bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
+ lbmStartIO(bp);
+ if ((rc = lbmIOWait(bp, 0)))
+ goto exit;
+
+ /*
+ * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
+ */
+ for (lspn = 0; lspn < npages - 3; lspn++) {
+ lp->h.page = lp->t.page = cpu_to_le32(lspn);
+ lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
+
+ bp->l_blkno += sbi->nbperpage;
+ bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
+ lbmStartIO(bp);
+ if ((rc = lbmIOWait(bp, 0)))
+ goto exit;
+ }
+
+ rc = 0;
+exit:
+ /*
+ * finalize log
+ */
+ /* release the buffer */
+ lbmFree(bp);
+
+ return rc;
+}
+
+#ifdef CONFIG_JFS_STATISTICS
+int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
+ int *eof, void *data)
+{
+ int len = 0;
+ off_t begin;
+
+ len += sprintf(buffer,
+ "JFS Logmgr stats\n"
+ "================\n"
+ "commits = %d\n"
+ "writes submitted = %d\n"
+ "writes completed = %d\n"
+ "full pages submitted = %d\n"
+ "partial pages submitted = %d\n",
+ lmStat.commit,
+ lmStat.submitted,
+ lmStat.pagedone,
+ lmStat.full_page,
+ lmStat.partial_page);
+
+ begin = offset;
+ *start = buffer + begin;
+ len -= begin;
+
+ if (len > length)
+ len = length;
+ else
+ *eof = 1;
+
+ if (len < 0)
+ len = 0;
+
+ return len;
+}
+#endif /* CONFIG_JFS_STATISTICS */