D36491.id112753.diff
No OneTemporary
Actions

Size

44 KB

Referenced Files

None

Subscribers

None

D36491.id112753.diff
View Options

	diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c
	--- a/sbin/fsck_ffs/dir.c
	+++ b/sbin/fsck_ffs/dir.c
	@@ -679,14 +679,17 @@
	struct bufarea bp, nbp;
	struct inodesc idesc;
	union dinode *dp;
	- int indiralloced;
	+ long cg, indiralloced;
	char *cp;

	nbp = NULL;
	indiralloced = newblk = indirblk = 0;
	+ memset(&idesc, 0, sizeof(struct inodesc));
	+ idesc.id_type = ADDR;
	pwarn("NO SPACE LEFT IN %s", name);
	if (!preen && reply("EXPAND") == 0)
	return (0);
	+ cg = ino_to_cg(&sblock, ip->i_number);
	dp = ip->i_dp;
	filesize = DIP(dp, di_size);
	lastlbn = lblkno(&sblock, filesize);
	@@ -705,7 +708,8 @@
	bp = getdirblk(oldblk, lastlbnsize);
	if (bp->b_errs)
	goto bad;
	- if ((newblk = allocblk(sblock.fs_frag)) == 0)
	+ newblk = allocblk(cg, sblock.fs_frag, std_checkblkavail);
	+ if (newblk == 0)
	goto bad;
	nbp = getdatablk(newblk, sblock.fs_bsize, BT_DIRDATA);
	if (nbp->b_errs)
	@@ -724,6 +728,7 @@
	memmove(cp, &emptydir, sizeof emptydir);
	dirty(nbp);
	brelse(nbp);
	+ binval(bp);
	idesc.id_blkno = oldblk;
	idesc.id_numfrags = numfrags(&sblock, lastlbnsize);
	(void)freeblock(&idesc);
	@@ -731,7 +736,7 @@
	printf(" (EXPANDED)\n");
	return (1);
	}
	- if ((newblk = allocblk(sblock.fs_frag)) == 0)
	+ if ((newblk = allocblk(cg, sblock.fs_frag, std_checkblkavail)) == 0)
	goto bad;
	bp = getdirblk(newblk, sblock.fs_bsize);
	if (bp->b_errs)
	@@ -749,8 +754,12 @@
	* Allocate indirect block if needed.
	*/
	if ((indirblk = DIP(dp, di_ib[0])) == 0) {
	- if ((indirblk = allocblk(sblock.fs_frag)) == 0)
	+ indirblk = allocblk(cg, sblock.fs_frag,
	+ std_checkblkavail);
	+ if (indirblk == 0) {
	+ binval(bp);
	goto bad;
	+ }
	indiralloced = 1;
	}
	nbp = getdatablk(indirblk, sblock.fs_bsize, BT_LEVEL1);
	@@ -774,8 +783,10 @@
	return (1);
	bad:
	pfatal(" (EXPANSION FAILED)\n");
	- if (nbp != NULL)
	+ if (nbp != NULL) {
	+ binval(bp);
	brelse(nbp);
	+ }
	if (newblk != 0) {
	idesc.id_blkno = newblk;
	idesc.id_numfrags = sblock.fs_frag;
	diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h
	--- a/sbin/fsck_ffs/fsck.h
	+++ b/sbin/fsck_ffs/fsck.h
	@@ -200,8 +200,7 @@
	#define BT_INODES 7 /* Buffer holds inodes */
	#define BT_DIRDATA 8 /* Buffer holds directory data */
	#define BT_DATA 9 /* Buffer holds user data */
	-#define BT_EMPTY 10 /* Buffer allocated but not filled */
	-#define BT_NUMBUFTYPES 11
	+#define BT_NUMBUFTYPES 10
	#define BT_NAMES { \
	"unknown", \
	"Superblock", \
	@@ -212,8 +211,7 @@
	"External Attribute", \
	"Inode Block", \
	"Directory Contents", \
	- "User Data", \
	- "Allocated but not filled" }
	+ "User Data" }
	extern char *buftype[];
	#define BT_BUFTYPE(type) \
	type < BT_NUMBUFTYPES ? buftype[type] : buftype[BT_UNKNOWN]
	@@ -234,7 +232,7 @@
	(bp)->b_flags \|= B_DIRTY; \
	} while (0)
	#define initbarea(bp, type) do { \
	- (bp)->b_bno = (ufs2_daddr_t)-1; \
	+ (bp)->b_bno = (ufs2_daddr_t)-4; \
	(bp)->b_size = 0; \
	(bp)->b_errs = 0; \
	(bp)->b_flags = 0; \
	@@ -347,6 +345,7 @@
	extern char cdevname; / name of device being checked */
	extern char ckclean; /* only do work if not cleanly unmounted */
	extern int ckhashadd; /* check hashes to be added */
	+extern char copybuf; / buffer to copy snapshot blocks */
	extern int cvtlevel; /* convert to newer file system format */
	extern long dev_bsize; /* computed value of DEV_BSIZE */
	extern u_int real_dev_bsize; /* actual disk sector size, not overridden */
	@@ -371,6 +370,8 @@
	extern int returntosingle; /* 1 => return to single user mode on exit */
	extern long secsize; /* actual disk sector size */
	extern char skipclean; /* skip clean file systems if preening */
	+extern int snapcnt; /* number of active snapshots */
	+extern struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */
	extern char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */
	extern int sujrecovery; /* 1 => doing check using the journal */
	extern int surrender; /* Give up if reads fail */
	@@ -441,9 +442,11 @@

	void adjust(struct inodesc *, int lcnt);
	void alarmhandler(int sig);
	-ufs2_daddr_t allocblk(long frags);
	+ufs2_daddr_t allocblk(long cg, long frags, ufs2_daddr_t (*checkblkavail)
	+ (ufs2_daddr_t blkno, long frags));
	ino_t allocdir(ino_t parent, ino_t request, int mode);
	ino_t allocino(ino_t request, int type);
	+void binval(struct bufarea *);
	void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk);
	char blockcheck(char name);
	int blread(int fd, char *buf, ufs2_daddr_t blk, long size);
	@@ -458,12 +461,15 @@
	void cgdirty(struct bufarea *);
	struct bufarea *cglookup(int cg);
	int changeino(ino_t dir, const char *name, ino_t newnum);
	+void check_blkcnt(struct inode *ip);
	int check_cgmagic(int cg, struct bufarea *cgbp, int requestrebuild);
	int chkrange(ufs2_daddr_t blk, int cnt);
	void ckfini(int markclean);
	int ckinode(union dinode dp, struct inodesc );
	void clri(struct inodesc , const char type, int flag);
	int clearentry(struct inodesc *);
	+void copyonwrite(struct fs , struct bufarea ,
	+ ufs2_daddr_t (*checkblkavail)(long, long));
	void direrror(ino_t ino, const char *errmesg);
	int dirscan(struct inodesc *);
	int dofix(struct inodesc , const char msg);
	@@ -476,6 +482,7 @@
	int freeblock(struct inodesc *);
	void freeino(ino_t ino);
	void freeinodebuf(void);
	+void fsckinit(void);
	void fsutilinit(void);
	int ftypeok(union dinode *dp);
	void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size);
	@@ -484,6 +491,7 @@
	union dinode *getnextinode(ino_t inumber, int rebuildcg);
	void getpathname(char *namebuf, ino_t curdir, ino_t ino);
	void ginode(ino_t, struct inode *);
	+void gjournal_check(const char *filesys);
	void infohandler(int sig);
	void irelse(struct inode *);
	ufs2_daddr_t ino_blkatoff(union dinode , ino_t, ufs_lbn_t, int ,
	@@ -505,6 +513,7 @@
	void pass5(void);
	void pfatal(const char *fmt, ...) __printflike(1, 2);
	void propagate(void);
	+void prtbuf(struct bufarea , const char , ...) __printflike(2, 3);
	void prtinode(struct inode *);
	void pwarn(const char *fmt, ...) __printflike(1, 2);
	int readsb(void);
	@@ -513,9 +522,13 @@
	void sblock_init(void);
	void setinodebuf(int, ino_t);
	int setup(char *dev);
	-void gjournal_check(const char *filesys);
	+int snapblkfree(struct fs *, ufs2_daddr_t, long, ino_t,
	+ ufs2_daddr_t (*)(ufs2_daddr_t, long));
	+void snapremove(ino_t);
	+void snapflush(ufs2_daddr_t (*checkblkavail)(long, long));
	+ufs2_daddr_t std_checkblkavail(ufs2_daddr_t blkno, long frags);
	+ufs2_daddr_t suj_checkblkavail(ufs2_daddr_t, long);
	int suj_check(const char *filesys);
	void update_maps(struct cg , struct cg, int);
	-void fsckinit(void);

	#endif /* !_FSCK_H_ */
	diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c
	--- a/sbin/fsck_ffs/fsutil.c
	+++ b/sbin/fsck_ffs/fsutil.c
	@@ -71,7 +71,6 @@
	static void slowio_start(void);
	static void slowio_end(void);
	static void printIOstats(void);
	-static void prtbuf(const char , struct bufarea );

	static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */
	static struct timespec startpass, finishpass;
	@@ -79,8 +78,10 @@
	int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */
	int slowio_pollcnt;
	static struct bufarea cgblk; /* backup buffer for cylinder group blocks */
	+static struct bufarea failedbuf; /* returned by failed getdatablk() */
	static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */
	static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */
	+static struct bufhash freebufs; /* unused buffers */
	static int numbufs; /* size of buffer cache */
	static int cachelookups; /* number of cache lookups */
	static int cachereads; /* number of cache reads */
	@@ -187,11 +188,15 @@
	{
	int i;

	+ initbarea(&failedbuf, BT_UNKNOWN);
	+ failedbuf.b_errs = -1;
	+ failedbuf.b_un.b_buf = NULL;
	if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL)
	errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize);
	initbarea(&cgblk, BT_CYLGRP);
	numbufs = cachelookups = cachereads = 0;
	TAILQ_INIT(&bufqueuehd);
	+ LIST_INIT(&freebufs);
	for (i = 0; i < HASHSIZE; i++)
	LIST_INIT(&bufhashhd[i]);
	for (i = 0; i < BT_NUMBUFTYPES; i++) {
	@@ -300,7 +305,7 @@
	}

	/*
	- * Manage a cache of directory blocks.
	+ * Manage a cache of filesystem disk blocks.
	*/
	struct bufarea *
	getdatablk(ufs2_daddr_t blkno, long size, int type)
	@@ -309,19 +314,23 @@
	struct bufhash *bhdp;

	cachelookups++;
	- /* If out of range, return empty buffer with b_err == -1 */
	- if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) {
	- blkno = -1;
	- type = BT_EMPTY;
	- }
	+ /*
	+ * If out of range, return empty buffer with b_err == -1
	+ *
	+ * Skip check for inodes because chkrange() considers
	+ * metadata areas invalid to write data.
	+ */
	+ if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize))
	+ return (&failedbuf);
	bhdp = &bufhashhd[HASH(blkno)];
	LIST_FOREACH(bp, bhdp, b_hash)
	if (bp->b_bno == fsbtodb(&sblock, blkno)) {
	if (debug && bp->b_size != size) {
	- prtbuf("getdatablk: size mismatch", bp);
	+ prtbuf(bp, "getdatablk: size mismatch");
	pfatal("getdatablk: b_size %d != size %ld\n",
	bp->b_size, size);
	}
	+ TAILQ_REMOVE(&bufqueuehd, bp, b_list);
	goto foundit;
	}
	/*
	@@ -340,7 +349,9 @@
	if (size > sblock.fs_bsize)
	errx(EEXIT, "Excessive buffer size %ld > %d\n", size,
	sblock.fs_bsize);
	- if (numbufs < MINBUFS) {
	+ if ((bp = LIST_FIRST(&freebufs)) != NULL) {
	+ LIST_REMOVE(bp, b_hash);
	+ } else if (numbufs < MINBUFS) {
	bp = allocbuf("cannot create minimal buffer pool");
	} else if (sujrecovery) {
	/*
	@@ -368,6 +379,7 @@
	else
	LIST_REMOVE(bp, b_hash);
	}
	+ TAILQ_REMOVE(&bufqueuehd, bp, b_list);
	flush(fswritefd, bp);
	bp->b_type = type;
	LIST_INSERT_HEAD(bhdp, bp, b_hash);
	@@ -375,13 +387,12 @@
	cachereads++;
	/* fall through */
	foundit:
	+ TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
	if (debug && bp->b_type != type) {
	printf("getdatablk: buffer type changed to %s",
	BT_BUFTYPE(type));
	- prtbuf("", bp);
	+ prtbuf(bp, "");
	}
	- TAILQ_REMOVE(&bufqueuehd, bp, b_list);
	- TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
	if (bp->b_errs == 0)
	bp->b_refcnt++;
	return (bp);
	@@ -401,11 +412,7 @@
	readcnt[bp->b_type]++;
	clock_gettime(CLOCK_REALTIME_PRECISE, &start);
	}
	- if (bp->b_type != BT_EMPTY)
	- bp->b_errs =
	- blread(fsreadfd, bp->b_un.b_buf, dblk, size);
	- else
	- bp->b_errs = -1;
	+ bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size);
	if (debug) {
	clock_gettime(CLOCK_REALTIME_PRECISE, &finish);
	timespecsub(&finish, &start, &finish);
	@@ -422,10 +429,19 @@
	{

	if (bp->b_refcnt <= 0)
	- prtbuf("brelse: buffer with negative reference count", bp);
	+ prtbuf(bp, "brelse: buffer with negative reference count");
	bp->b_refcnt--;
	}

	+void
	+binval(struct bufarea *bp)
	+{
	+
	+ bp->b_flags &= ~B_DIRTY;
	+ LIST_REMOVE(bp, b_hash);
	+ LIST_INSERT_HEAD(&freebufs, bp, b_hash);
	+}
	+
	void
	flush(int fd, struct bufarea *bp)
	{
	@@ -451,10 +467,18 @@
	if (bp != &sblk)
	pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n",
	bp, &sblk);
	+ /*
	+ * Superblocks are always pre-copied so we do not need
	+ * to check them for copy-on-write.
	+ */
	if (sbput(fd, bp->b_un.b_fs, 0) == 0)
	fsmodified = 1;
	break;
	case BT_CYLGRP:
	+ /*
	+ * Cylinder groups are always pre-copied so we do not
	+ * need to check them for copy-on-write.
	+ */
	if (sujrecovery)
	cg_write(bp);
	if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0)
	@@ -483,11 +507,38 @@
	}
	/* FALLTHROUGH */
	default:
	+ copyonwrite(&sblock, bp, std_checkblkavail);
	blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size);
	break;
	}
	}

	+/*
	+ * If there are any snapshots, ensure that all the blocks that they
	+ * care about have been copied, then release the snapshot inodes.
	+ * These operations need to be done before we rebuild the cylinder
	+ * groups so that any block allocations are properly recorded.
	+ * Since all the cylinder group maps have already been copied in
	+ * the snapshots, no further snapshot copies will need to be done.
	+ */
	+void
	+snapflush(ufs2_daddr_t (*checkblkavail)(long, long))
	+{
	+ struct bufarea *bp;
	+ int cnt;
	+
	+ if (snapcnt > 0) {
	+ if (debug)
	+ printf("Check for snapshot copies\n");
	+ TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
	+ if ((bp->b_flags & B_DIRTY) != 0)
	+ copyonwrite(&sblock, bp, checkblkavail);
	+ for (cnt = 0; cnt < snapcnt; cnt++)
	+ irelse(&snaplist[cnt]);
	+ snapcnt = 0;
	+ }
	+}
	+
	/*
	* Journaled soft updates does not maintain cylinder group summary
	* information during cleanup, so this routine recalculates the summary
	@@ -499,6 +550,7 @@
	{
	ufs1_daddr_t fragno, cgbno, maxbno;
	u_int8_t *blksfree;
	+ struct csum *csp;
	struct cg *cgp;
	int blk;
	int i;
	@@ -536,6 +588,11 @@
	* Update the superblock cg summary from our now correct values
	* before writing the block.
	*/
	+ csp = &sblock.fs_cs(&sblock, cgp->cg_cgx);
	+ sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir;
	+ sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree;
	+ sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree;
	+ sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree;
	sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs;
	}

	@@ -587,6 +644,7 @@
	(void)close(fsreadfd);
	return;
	}
	+
	/*
	* To remain idempotent with partial truncations the buffers
	* must be flushed in this order:
	@@ -629,14 +687,9 @@
	case BT_SUPERBLK:
	case BT_CYLGRP:
	default:
	- prtbuf("ckfini: improper buffer type on cache list",bp);
	+ prtbuf(bp,"ckfini: improper buffer type on cache list");
	continue;
	/* These are the ones to flush in this step */
	- case BT_EMPTY:
	- if (bp->b_bno >= 0)
	- pfatal("Unused BT_EMPTY buffer for block %jd\n",
	- (intmax_t)bp->b_bno);
	- /* FALLTHROUGH */
	case BT_LEVEL1:
	case BT_LEVEL2:
	case BT_LEVEL3:
	@@ -648,11 +701,10 @@
	case BT_INODES:
	continue;
	}
	- if (debug && bp->b_refcnt != 0) {
	- prtbuf("ckfini: clearing in-use buffer", bp);
	- pfatal("ckfini: clearing in-use buffer\n");
	- }
	+ if (debug && bp->b_refcnt != 0)
	+ prtbuf(bp, "ckfini: clearing in-use buffer");
	TAILQ_REMOVE(&bufqueuehd, bp, b_list);
	+ LIST_REMOVE(bp, b_hash);
	cnt++;
	flush(fswritefd, bp);
	free(bp->b_un.b_buf);
	@@ -666,11 +718,10 @@
	icachebp = NULL;
	}
	TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
	- if (debug && bp->b_refcnt != 0) {
	- prtbuf("ckfini: clearing in-use buffer", bp);
	- pfatal("ckfini: clearing in-use buffer\n");
	- }
	+ if (debug && bp->b_refcnt != 0)
	+ prtbuf(bp, "ckfini: clearing in-use buffer");
	TAILQ_REMOVE(&bufqueuehd, bp, b_list);
	+ LIST_REMOVE(bp, b_hash);
	cnt++;
	flush(fswritefd, bp);
	free(bp->b_un.b_buf);
	@@ -1050,45 +1101,77 @@
	* allocate a data block with the specified number of fragments
	*/
	ufs2_daddr_t
	-allocblk(long frags)
	+allocblk(long startcg, long frags,
	+ ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
	{
	- int i, j, k, cg, baseblk;
	- struct bufarea *cgbp;
	- struct cg *cgp;
	+ ufs2_daddr_t blkno, newblk;

	+ if (sujrecovery && checkblkavail == std_checkblkavail) {
	+ pfatal("allocblk: std_checkblkavail used for SUJ recovery\n");
	+ return (0);
	+ }
	if (frags <= 0 \|\| frags > sblock.fs_frag)
	return (0);
	- for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) {
	- for (j = 0; j <= sblock.fs_frag - frags; j++) {
	- if (testbmap(i + j))
	- continue;
	- for (k = 1; k < frags; k++)
	- if (testbmap(i + j + k))
	- break;
	- if (k < frags) {
	- j += k;
	- continue;
	- }
	- cg = dtog(&sblock, i + j);
	- cgbp = cglookup(cg);
	- cgp = cgbp->b_un.b_cg;
	- if (!check_cgmagic(cg, cgbp, 0)) {
	- i = (cg + 1) * sblock.fs_fpg - sblock.fs_frag;
	- continue;
	- }
	- baseblk = dtogd(&sblock, i + j);
	- for (k = 0; k < frags; k++) {
	- setbmap(i + j + k);
	- clrbit(cg_blksfree(cgp), baseblk + k);
	- }
	- n_blks += frags;
	- if (frags == sblock.fs_frag)
	- cgp->cg_cs.cs_nbfree--;
	- else
	- cgp->cg_cs.cs_nffree -= frags;
	- cgdirty(cgbp);
	- return (i + j);
	+ for (blkno = cgdata(&sblock, startcg);
	+ blkno < maxfsblock - sblock.fs_frag;
	+ blkno += sblock.fs_frag) {
	+ if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
	+ continue;
	+ if (newblk > 0)
	+ return (newblk);
	+ if (newblk < 0)
	+ blkno = -newblk;
	+ }
	+ for (blkno = cgdata(&sblock, 0);
	+ blkno < cgbase(&sblock, startcg) - sblock.fs_frag;
	+ blkno += sblock.fs_frag) {
	+ if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
	+ continue;
	+ if (newblk > 0)
	+ return (newblk);
	+ if (newblk < 0)
	+ blkno = -newblk;
	+ }
	+ return (0);
	+}
	+
	+ufs2_daddr_t
	+std_checkblkavail(blkno, frags)
	+ ufs2_daddr_t blkno;
	+ long frags;
	+{
	+ struct bufarea *cgbp;
	+ struct cg *cgp;
	+ ufs2_daddr_t j, k, baseblk;
	+ long cg;
	+
	+ for (j = 0; j <= sblock.fs_frag - frags; j++) {
	+ if (testbmap(blkno + j))
	+ continue;
	+ for (k = 1; k < frags; k++)
	+ if (testbmap(blkno + j + k))
	+ break;
	+ if (k < frags) {
	+ j += k;
	+ continue;
	}
	+ cg = dtog(&sblock, blkno + j);
	+ cgbp = cglookup(cg);
	+ cgp = cgbp->b_un.b_cg;
	+ if (!check_cgmagic(cg, cgbp, 0))
	+ return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
	+ baseblk = dtogd(&sblock, blkno + j);
	+ for (k = 0; k < frags; k++) {
	+ setbmap(blkno + j + k);
	+ clrbit(cg_blksfree(cgp), baseblk + k);
	+ }
	+ n_blks += frags;
	+ if (frags == sblock.fs_frag)
	+ cgp->cg_cs.cs_nbfree--;
	+ else
	+ cgp->cg_cs.cs_nffree -= frags;
	+ cgdirty(cgbp);
	+ return (blkno + j);
	}
	return (0);
	}
	@@ -1261,14 +1344,19 @@
	/*
	* Print details about a buffer.
	*/
	-static void
	-prtbuf(const char msg, struct bufarea bp)
	+void
	+prtbuf(struct bufarea bp, const char fmt, ...)
	{
	-
	- printf("%s: bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
	- "index %jd\n", msg, bp, BT_BUFTYPE(bp->b_type),
	- (intmax_t) bp->b_bno, bp->b_size, bp->b_refcnt,
	- bp->b_flags & B_DIRTY ? "dirty" : "clean", (intmax_t) bp->b_index);
	+ va_list ap;
	+ va_start(ap, fmt);
	+ if (preen)
	+ (void)fprintf(stdout, "%s: ", cdevname);
	+ (void)vfprintf(stdout, fmt, ap);
	+ va_end(ap);
	+ printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
	+ "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno,
	+ bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean",
	+ (intmax_t) bp->b_index);
	}

	/*
	diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c
	--- a/sbin/fsck_ffs/inode.c
	+++ b/sbin/fsck_ffs/inode.c
	@@ -38,6 +38,7 @@
	__FBSDID("$FreeBSD$");

	#include <sys/param.h>
	+#include <sys/stat.h>
	#include <sys/stdint.h>
	#include <sys/sysctl.h>

	@@ -58,6 +59,9 @@
	static int iblock(struct inodesc *, off_t isize, int type);
	static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
	struct bufarea **);
	+static int snapclean(struct inodesc *idesc);
	+static void chkcopyonwrite(struct fs *, ufs2_daddr_t,
	+ ufs2_daddr_t (*checkblkavail)(long, long));

	int
	ckinode(union dinode dp, struct inodesc idesc)
	@@ -378,8 +382,12 @@
	int c;

	if (cnt <= 0 \|\| blk <= 0 \|\| blk > maxfsblock \|\|
	- cnt - 1 > maxfsblock - blk)
	+ cnt - 1 > maxfsblock - blk) {
	+ if (debug)
	+ printf("out of range: blk %ld, offset %i, size %d\n",
	+ (long)blk, (int)fragnum(&sblock, blk), cnt);
	return (1);
	+ }
	if (cnt > sblock.fs_frag \|\|
	fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
	if (debug)
	@@ -650,11 +658,21 @@
	freeblock(struct inodesc *idesc)
	{
	struct dups *dlp;
	+ struct bufarea *cgbp;
	+ struct cg *cgp;
	ufs2_daddr_t blkno;
	- long nfrags, res;
	+ long size, nfrags, res;

	res = KEEPON;
	blkno = idesc->id_blkno;
	+ if (idesc->id_type == SNAP) {
	+ pfatal("clearing a snapshot dinode\n");
	+ return (STOP);
	+ }
	+ size = lfragtosize(&sblock, idesc->id_numfrags);
	+ if (snapblkfree(&sblock, blkno, size, idesc->id_number,
	+ std_checkblkavail))
	+ return (res);
	for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
	if (chkrange(blkno, 1)) {
	res = SKIP;
	@@ -674,12 +692,407 @@
	}
	}
	}
	+ /*
	+ * If all successfully returned, account for them.
	+ */
	+ if (nfrags == 0) {
	+ cgbp = cglookup(dtog(&sblock, idesc->id_blkno));
	+ cgp = cgbp->b_un.b_cg;
	+ if (idesc->id_numfrags == sblock.fs_frag)
	+ cgp->cg_cs.cs_nbfree++;
	+ else
	+ cgp->cg_cs.cs_nffree += idesc->id_numfrags;
	+ cgdirty(cgbp);
	+ }
	return (res);
	}

	+/*
	+ * Prepare a snapshot file for being removed.
	+ */
	+void
	+snapremove(ino_t inum)
	+{
	+ struct inodesc idesc;
	+ struct inode ip;
	+ int i;
	+
	+ for (i = 0; i < snapcnt; i++)
	+ if (snaplist[i].i_number == inum)
	+ break;
	+ if (i == snapcnt)
	+ ginode(inum, &ip);
	+ else
	+ ip = snaplist[i];
	+ if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
	+ printf("snapremove: inode %jd is not a snapshot\n",
	+ (intmax_t)inum);
	+ if (i == snapcnt)
	+ irelse(&ip);
	+ return;
	+ }
	+ if (debug)
	+ printf("snapremove: remove %sactive snapshot %jd\n",
	+ i == snapcnt ? "in" : "", (intmax_t)inum);
	+ /*
	+ * If on active snapshot list, remove it.
	+ */
	+ if (i < snapcnt) {
	+ for (i++; i < FSMAXSNAP; i++) {
	+ if (sblock.fs_snapinum[i] == 0)
	+ break;
	+ snaplist[i - 1] = snaplist[i];
	+ sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
	+ }
	+ sblock.fs_snapinum[i - 1] = 0;
	+ bzero(&snaplist[i - 1], sizeof(struct inode));
	+ snapcnt--;
	+ }
	+ idesc.id_type = SNAP;
	+ idesc.id_func = snapclean;
	+ idesc.id_number = inum;
	+ (void)ckinode(ip.i_dp, &idesc);
	+ DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
	+ inodirty(&ip);
	+ irelse(&ip);
	+}
	+
	+static int
	+snapclean(struct inodesc *idesc)
	+{
	+ ufs2_daddr_t blkno;
	+ struct bufarea *bp;
	+ union dinode *dp;
	+
	+ blkno = idesc->id_blkno;
	+ if (blkno == 0)
	+ return (KEEPON);
	+
	+ bp = idesc->id_bp;
	+ dp = idesc->id_dp;
	+ if (blkno == BLK_NOCOPY \|\| blkno == BLK_SNAP) {
	+ if (idesc->id_lbn < UFS_NDADDR)
	+ DIP_SET(dp, di_db[idesc->id_lbn], 0);
	+ else
	+ IBLK_SET(bp, bp->b_index, 0);
	+ dirty(bp);
	+ }
	+ return (KEEPON);
	+}
	+
	+/*
	+ * Notification that a block is being freed. Return zero if the free
	+ * should be allowed to proceed. Return non-zero if the snapshot file
	+ * wants to claim the block. The block will be claimed if it is an
	+ * uncopied part of one of the snapshots. It will be freed if it is
	+ * either a BLK_NOCOPY or has already been copied in all of the snapshots.
	+ * If a fragment is being freed, then all snapshots that care about
	+ * it must make a copy since a snapshot file can only claim full sized
	+ * blocks. Note that if more than one snapshot file maps the block,
	+ * we can pick one at random to claim it. Since none of the snapshots
	+ * can change, we are assurred that they will all see the same unmodified
	+ * image. When deleting a snapshot file (see ino_trunc above), we
	+ * must push any of these claimed blocks to one of the other snapshots
	+ * that maps it. These claimed blocks are easily identified as they will
	+ * have a block number equal to their logical block number within the
	+ * snapshot. A copied block can never have this property because they
	+ * must always have been allocated from a BLK_NOCOPY location.
	+ */
	+int
	+snapblkfree(fs, bno, size, inum, checkblkavail)
	+ struct fs *fs;
	+ ufs2_daddr_t bno;
	+ long size;
	+ ino_t inum;
	+ ufs2_daddr_t (*checkblkavail)(long cg, long frags);
	+{
	+ union dinode *dp;
	+ struct inode ip;
	+ struct bufarea *snapbp;
	+ ufs_lbn_t lbn;
	+ ufs2_daddr_t blkno, relblkno;
	+ int i, frags, claimedblk, copydone;
	+
	+ /* If no snapshots, nothing to do */
	+ if (snapcnt == 0)
	+ return (0);
	+ if (debug)
	+ printf("snapblkfree: in ino %ld free blkno %ld, size %ld\n",
	+ inum, bno, size);
	+ relblkno = blknum(fs, bno);
	+ lbn = fragstoblks(fs, relblkno);
	+ /* Direct blocks are always pre-copied */
	+ if (lbn < UFS_NDADDR)
	+ return (0);
	+ copydone = 0;
	+ claimedblk = 0;
	+ for (i = 0; i < snapcnt; i++) {
	+ /*
	+ * Lookup block being freed.
	+ */
	+ ip = snaplist[i];
	+ dp = ip.i_dp;
	+ blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
	+ lbn, &frags, &snapbp);
	+ /*
	+ * Check to see if block needs to be copied.
	+ */
	+ if (blkno == 0) {
	+ /*
	+ * A block that we map is being freed. If it has not
	+ * been claimed yet, we will claim or copy it (below).
	+ */
	+ claimedblk = 1;
	+ } else if (blkno == BLK_SNAP) {
	+ /*
	+ * No previous snapshot claimed the block,
	+ * so it will be freed and become a BLK_NOCOPY
	+ * (don't care) for us.
	+ */
	+ if (claimedblk)
	+ pfatal("snapblkfree: inconsistent block type");
	+ IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
	+ dirty(snapbp);
	+ brelse(snapbp);
	+ continue;
	+ } else /* BLK_NOCOPY or default */ {
	+ /*
	+ * If the snapshot has already copied the block
	+ * (default), or does not care about the block,
	+ * it is not needed.
	+ */
	+ brelse(snapbp);
	+ continue;
	+ }
	+ /*
	+ * If this is a full size block, we will just grab it
	+ * and assign it to the snapshot inode. Otherwise we
	+ * will proceed to copy it. See explanation for this
	+ * routine as to why only a single snapshot needs to
	+ * claim this block.
	+ */
	+ if (size == fs->fs_bsize) {
	+ if (debug)
	+ printf("Grabonremove snapshot %ju lbn %jd "
	+ "from inum %ju\n", (intmax_t)ip.i_number,
	+ (intmax_t)lbn, (uintmax_t)inum);
	+ IBLK_SET(snapbp, snapbp->b_index, relblkno);
	+ dirty(snapbp);
	+ brelse(snapbp);
	+ DIP_SET(dp, di_blocks,
	+ DIP(dp, di_blocks) + btodb(size));
	+ inodirty(&ip);
	+ return (1);
	+ }
	+
	+ /* First time through, read the contents of the old block. */
	+ if (copydone == 0) {
	+ copydone = 1;
	+ if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
	+ fs->fs_bsize) != 0) {
	+ pfatal("Could not read snapshot %ju block "
	+ "%jd\n", (intmax_t)ip.i_number,
	+ (intmax_t)relblkno);
	+ continue;
	+ }
	+ }
	+ /*
	+ * This allocation will never require any additional
	+ * allocations for the snapshot inode.
	+ */
	+ blkno = allocblk(dtog(fs, relblkno), fs->fs_frag,
	+ checkblkavail);
	+ if (blkno == 0) {
	+ pfatal("Could not allocate block for snapshot %ju\n",
	+ (intmax_t)ip.i_number);
	+ continue;
	+ }
	+ if (debug)
	+ printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
	+ "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
	+ (intmax_t)lbn, (uintmax_t)inum, size,
	+ (intmax_t)blkno);
	+ blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
	+ IBLK_SET(snapbp, snapbp->b_index, blkno);
	+ dirty(snapbp);
	+ brelse(snapbp);
	+ DIP_SET(dp, di_blocks,
	+ DIP(dp, di_blocks) + btodb(fs->fs_bsize));
	+ inodirty(&ip);
	+ }
	+ return (0);
	+}
	+
	+/*
	+ * Notification that a block is being written. Return if the block
	+ * is part of a snapshot as snapshots never track other snapshots.
	+ * The block will be copied in all of the snapshots that are tracking
	+ * it and have not yet copied it. Some buffers may hold more than one
	+ * block. Here we need to check each block in the buffer.
	+ */
	+void
	+copyonwrite(fs, bp, checkblkavail)
	+ struct fs *fs;
	+ struct bufarea *bp;
	+ ufs2_daddr_t (*checkblkavail)(long cg, long frags);
	+{
	+ ufs2_daddr_t copyblkno;
	+ long i, numblks;
	+
	+ /* If no snapshots, nothing to do. */
	+ if (snapcnt == 0)
	+ return;
	+ numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
	+ if (debug)
	+ prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
	+ numblks, numblks > 1 ? "s" : "");
	+ copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
	+ for (i = 0; i < numblks; i++) {
	+ chkcopyonwrite(fs, copyblkno, checkblkavail);
	+ copyblkno += fs->fs_frag;
	+ }
	+}
	+
	+static void
	+chkcopyonwrite(fs, copyblkno, checkblkavail)
	+ struct fs *fs;
	+ ufs2_daddr_t copyblkno;
	+ ufs2_daddr_t (*checkblkavail)(long cg, long frags);
	+{
	+ struct inode ip;
	+ union dinode *dp;
	+ struct bufarea *snapbp;
	+ ufs2_daddr_t blkno;
	+ int i, frags, copydone;
	+ ufs_lbn_t lbn;
	+
	+ lbn = fragstoblks(fs, copyblkno);
	+ /* Direct blocks are always pre-copied */
	+ if (lbn < UFS_NDADDR)
	+ return;
	+ copydone = 0;
	+ for (i = 0; i < snapcnt; i++) {
	+ /*
	+ * Lookup block being freed.
	+ */
	+ ip = snaplist[i];
	+ dp = ip.i_dp;
	+ blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
	+ /*
	+ * Check to see if block needs to be copied.
	+ */
	+ if (blkno != 0) {
	+ /*
	+ * A block that we have already copied or don't track.
	+ */
	+ brelse(snapbp);
	+ continue;
	+ }
	+ /* First time through, read the contents of the old block. */
	+ if (copydone == 0) {
	+ copydone = 1;
	+ if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
	+ fs->fs_bsize) != 0) {
	+ pfatal("Could not read snapshot %ju block "
	+ "%jd\n", (intmax_t)ip.i_number,
	+ (intmax_t)copyblkno);
	+ continue;
	+ }
	+ }
	+ /*
	+ * This allocation will never require any additional
	+ * allocations for the snapshot inode.
	+ */
	+ if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
	+ checkblkavail)) == 0) {
	+ pfatal("Could not allocate block for snapshot %ju\n",
	+ (intmax_t)ip.i_number);
	+ continue;
	+ }
	+ if (debug)
	+ prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
	+ "blkno %ju setting in buffer",
	+ (intmax_t)ip.i_number, (intmax_t)lbn,
	+ (intmax_t)blkno);
	+ blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
	+ IBLK_SET(snapbp, snapbp->b_index, blkno);
	+ dirty(snapbp);
	+ brelse(snapbp);
	+ DIP_SET(dp, di_blocks,
	+ DIP(dp, di_blocks) + btodb(fs->fs_bsize));
	+ inodirty(&ip);
	+ }
	+ return;
	+}
	+
	+/*
	+ * Traverse an inode and check that its block count is correct
	+ * fixing it if necessary.
	+ */
	+void
	+check_blkcnt(struct inode *ip)
	+{
	+ struct inodesc idesc;
	+ union dinode *dp;
	+ ufs2_daddr_t ndb;
	+ int j, ret, offset;
	+
	+ dp = ip->i_dp;
	+ memset(&idesc, 0, sizeof(struct inodesc));
	+ idesc.id_func = pass1check;
	+ idesc.id_number = ip->i_number;
	+ idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP;
	+ (void)ckinode(dp, &idesc);
	+ if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) {
	+ ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize);
	+ for (j = 0; j < UFS_NXADDR; j++) {
	+ if (--ndb == 0 &&
	+ (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0)
	+ idesc.id_numfrags = numfrags(&sblock,
	+ fragroundup(&sblock, offset));
	+ else
	+ idesc.id_numfrags = sblock.fs_frag;
	+ if (dp->dp2.di_extb[j] == 0)
	+ continue;
	+ idesc.id_blkno = dp->dp2.di_extb[j];
	+ ret = (*idesc.id_func)(&idesc);
	+ if (ret & STOP)
	+ break;
	+ }
	+ }
	+ idesc.id_entryno *= btodb(sblock.fs_fsize);
	+ if (DIP(dp, di_blocks) != idesc.id_entryno) {
	+ if (!(sujrecovery && preen)) {
	+ pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)",
	+ (u_long)idesc.id_number,
	+ (uintmax_t)DIP(dp, di_blocks),
	+ (uintmax_t)idesc.id_entryno);
	+ if (preen)
	+ printf(" (CORRECTED)\n");
	+ else if (reply("CORRECT") == 0)
	+ return;
	+ }
	+ if (bkgrdflag == 0) {
	+ DIP_SET(dp, di_blocks, idesc.id_entryno);
	+ inodirty(ip);
	+ } else {
	+ cmd.value = idesc.id_number;
	+ cmd.size = idesc.id_entryno - DIP(dp, di_blocks);
	+ if (debug)
	+ printf("adjblkcnt ino %ju amount %lld\n",
	+ (uintmax_t)cmd.value, (long long)cmd.size);
	+ if (sysctl(adjblkcnt, MIBSIZE, 0, 0,
	+ &cmd, sizeof cmd) == -1)
	+ rwerror("ADJUST INODE BLOCK COUNT", cmd.value);
	+ }
	+ }
	+}
	+
	void
	freeinodebuf(void)
	{
	+ struct bufarea *bp;
	+ int i;

	/*
	* Flush old contents in case they have been updated.
	@@ -689,6 +1102,14 @@
	free((char *)inobuf.b_un.b_buf);
	inobuf.b_un.b_buf = NULL;
	firstinum = lastinum = 0;
	+ /*
	+ * Reload the snapshot inodes in case any of them changed.
	+ */
	+ for (i = 0; i < snapcnt; i++) {
	+ bp = snaplist[i].i_bp;
	+ bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
	+ bp->b_size);
	+ }
	}

	/*
	@@ -720,6 +1141,7 @@
	inpp = &inphead[inumber % dirhash];
	inp->i_nexthash = *inpp;
	*inpp = inp;
	+ inp->i_flags = 0;
	inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0;
	inp->i_dotdot = (ino_t)0;
	inp->i_number = inumber;
	@@ -803,6 +1225,10 @@
	printf(" (CLEARED)\n");
	n_files--;
	if (bkgrdflag == 0) {
	+ if (idesc->id_type == SNAP) {
	+ snapremove(idesc->id_number);
	+ idesc->id_type = ADDR;
	+ }
	(void)ckinode(dp, idesc);
	inoinfo(idesc->id_number)->ino_state = USTATE;
	clearinode(dp);
	@@ -967,7 +1393,8 @@
	cgdirty(cgbp);
	ginode(ino, &ip);
	dp = ip.i_dp;
	- DIP_SET(dp, di_db[0], allocblk((long)1));
	+ DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
	+ std_checkblkavail));
	if (DIP(dp, di_db[0]) == 0) {
	inoinfo(ino)->ino_state = USTATE;
	irelse(&ip);
	diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c
	--- a/sbin/fsck_ffs/main.c
	+++ b/sbin/fsck_ffs/main.c
	@@ -491,6 +491,7 @@
	*/
	if (preen == 0)
	printf("** Phase 5 - Check Cyl groups\n");
	+ snapflush(std_checkblkavail);
	pass5();
	IOstats("Pass5");

	diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c
	--- a/sbin/fsck_ffs/setup.c
	+++ b/sbin/fsck_ffs/setup.c
	@@ -59,6 +59,9 @@
	#include "fsck.h"

	struct inoinfo inphead, inpsort; /* info about all inodes */
	+struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */
	+int snapcnt; /* number of active snapshots */
	+char copybuf; / buffer to copy snapshot blocks */

	static int sbhashfailed;
	#define POWEROF2(num) (((num) & ((num) - 1)) == 0)
	@@ -66,6 +69,8 @@
	static int calcsb(char dev, int devfd, struct fs fs);
	static void saverecovery(int readfd, int writefd);
	static int chkrecovery(int devfd);
	+static int getlbnblkno(struct inodesc *);
	+static int checksnapinfo(struct inode *);

	/*
	* Read in a superblock finding an alternate if necessary.
	@@ -75,7 +80,8 @@
	int
	setup(char *dev)
	{
	- long bmapsize;
	+ long i, bmapsize;
	+ struct inode ip;

	/*
	* We are expected to have an open file descriptor and a superblock.
	@@ -174,6 +180,42 @@
	usedsoftdep = 1;
	else
	usedsoftdep = 0;
	+ /*
	+ * Collect any snapshot inodes so that we can allow them to
	+ * claim any blocks that we free. The code for doing this is
	+ * imported here and into inode.c from sys/ufs/ffs/ffs_snapshot.c.
	+ */
	+ for (snapcnt = 0; snapcnt < FSMAXSNAP; snapcnt++) {
	+ if (sblock.fs_snapinum[snapcnt] == 0)
	+ break;
	+ ginode(sblock.fs_snapinum[snapcnt], &ip);
	+ if ((DIP(ip.i_dp, di_mode) & IFMT) == IFREG &&
	+ (DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) != 0 &&
	+ checksnapinfo(&ip)) {
	+ if (debug)
	+ printf("Load snapshot %jd\n",
	+ (intmax_t)sblock.fs_snapinum[snapcnt]);
	+ snaplist[snapcnt] = ip;
	+ continue;
	+ }
	+ printf("Removing non-snapshot inode %ju from snapshot list\n",
	+ (uintmax_t)sblock.fs_snapinum[snapcnt]);
	+ irelse(&ip);
	+ for (i = snapcnt + 1; i < FSMAXSNAP; i++) {
	+ if (sblock.fs_snapinum[i] == 0)
	+ break;
	+ sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
	+ }
	+ sblock.fs_snapinum[i - 1] = 0;
	+ snapcnt--;
	+ sbdirty();
	+ }
	+ if (snapcnt > 0 && copybuf == NULL) {
	+ copybuf = Malloc(sblock.fs_bsize);
	+ if (copybuf == NULL)
	+ errx(EEXIT, "cannot allocate space for snapshot "
	+ "copy buffer");
	+ }
	return (1);

	badsb:
	@@ -181,6 +223,144 @@
	return (0);
	}

	+/*
	+ * Check for valid snapshot information.
	+ *
	+ * Each snapshot has a list of blocks that have been copied. This list
	+ * is consulted before checking the snapshot inode. Its purpose is to
	+ * speed checking of commonly checked blocks and to avoid recursive
	+ * checks of the snapshot inode. In particular, the list must contain
	+ * the superblock, the superblock summary information, and all the
	+ * cylinder group blocks. The list may contain other commonly checked
	+ * pointers such as those of the blocks that contain the snapshot inodes.
	+ * The list is sorted into block order to allow binary search lookup.
	+ *
	+ * The twelve direct direct block pointers of the snapshot are always
	+ * copied, so we test for them first before checking the list itself
	+ * (i.e., they are not in the list).
	+ *
	+ * The checksnapinfo() routine needs to ensure that the list contains at
	+ * least the super block, its summary information, and the cylinder groups.
	+ * Here we check the list first for the superblock, zero or more cylinder
	+ * groups up to the location of the superblock summary information, the
	+ * summary group information, and any remaining cylinder group maps that
	+ * follow it. We skip over any other entries in the list.
	+ */
	+#define CHKBLKINLIST(chkblk) \
	+ /* All UFS_NDADDR blocks are copied */ \
	+ if ((chkblk) >= UFS_NDADDR) { \
	+ /* Skip over blocks that are not of interest */ \
	+ while (*blkp < (chkblk) && blkp < lastblkp) \
	+ blkp++; \
	+ /* Fail if end of list and not all blocks found */ \
	+ if (blkp >= lastblkp) { \
	+ pwarn("UFS%d snapshot inode %jd failed: " \
	+ "improper block list length (%jd)\n", \
	+ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \
	+ (intmax_t)snapip->i_number, \
	+ (intmax_t)(lastblkp - &snapblklist[0])); \
	+ status = 0; \
	+ } \
	+ /* Fail if block we seek is missing */ \
	+ else if (*blkp++ != (chkblk)) { \
	+ pwarn("UFS%d snapshot inode %jd failed: " \
	+ "block list (%jd) != %s (%jd)\n", \
	+ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \
	+ (intmax_t)snapip->i_number, \
	+ (intmax_t)blkp[-1], #chkblk, \
	+ (intmax_t)chkblk); \
	+ status = 0; \
	+ } \
	+ }
	+
	+static int
	+checksnapinfo(struct inode *snapip)
	+{
	+ struct fs *fs;
	+ struct bufarea *bp;
	+ struct inodesc idesc;
	+ daddr_t snapblklist, blkp, *lastblkp, csblkno;
	+ int cg, loc, len, status;
	+ ufs_lbn_t lbn;
	+ size_t size;
	+
	+ fs = &sblock;
	+ memset(&idesc, 0, sizeof(struct inodesc));
	+ idesc.id_type = ADDR;
	+ idesc.id_func = getlbnblkno;
	+ idesc.id_number = snapip->i_number;
	+ lbn = howmany(fs->fs_size, fs->fs_frag);
	+ idesc.id_parent = lbn; /* sought after blkno */
	+ if ((ckinode(snapip->i_dp, &idesc) & FOUND) == 0)
	+ return (0);
	+ size = fragroundup(fs,
	+ DIP(snapip->i_dp, di_size) - lblktosize(fs, lbn));
	+ bp = getdatablk(idesc.id_parent, size, BT_DATA);
	+ snapblklist = (daddr_t *)bp->b_un.b_buf;
	+ /*
	+ * snapblklist[0] is the size of the list
	+ * snapblklist[1] is the first element of the list
	+ *
	+ * We need to be careful to bound the size of the list and verify
	+ * that we have not run off the end of it if it or its size has
	+ * been corrupted.
	+ */
	+ blkp = &snapblklist[1];
	+ lastblkp = &snapblklist[MAX(0,
	+ MIN(snapblklist[0] + 1, size / sizeof(daddr_t)))];
	+ status = 1;
	+ /* Check that the superblock is listed. */
	+ CHKBLKINLIST(lblkno(fs, fs->fs_sblockloc));
	+ if (status == 0)
	+ goto out;
	+ /*
	+ * Calculate where the summary information is located.
	+ * Usually it is in the first cylinder group, but growfs
	+ * may move it to the first cylinder group that it adds.
	+ *
	+ * Check all cylinder groups up to the summary information.
	+ */
	+ csblkno = fragstoblks(fs, fs->fs_csaddr);
	+ for (cg = 0; cg < fs->fs_ncg; cg++) {
	+ if (fragstoblks(fs, cgtod(fs, cg)) > csblkno)
	+ break;
	+ CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg)));
	+ if (status == 0)
	+ goto out;
	+ }
	+ /* Check the summary information block(s). */
	+ len = howmany(fs->fs_cssize, fs->fs_bsize);
	+ for (loc = 0; loc < len; loc++) {
	+ CHKBLKINLIST(csblkno + loc);
	+ if (status == 0)
	+ goto out;
	+ }
	+ /* Check the remaining cylinder groups. */
	+ for (; cg < fs->fs_ncg; cg++) {
	+ CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg)));
	+ if (status == 0)
	+ goto out;
	+ }
	+out:
	+ brelse(bp);
	+ return (status);
	+}
	+
	+/*
	+ * Return the block number associated with a specified inode lbn.
	+ * Requested lbn is in id_parent. If found, block is returned in
	+ * id_parent.
	+ */
	+static int
	+getlbnblkno(struct inodesc *idesc)
	+{
	+
	+ if (idesc->id_lbn < idesc->id_parent)
	+ return (KEEPON);
	+ idesc->id_parent = idesc->id_blkno;
	+ return (STOP \| FOUND);
	+}
	+
	/*
	* Open a device or file to be checked by fsck.
	*/
	diff --git a/sbin/fsck_ffs/suj.c b/sbin/fsck_ffs/suj.c
	--- a/sbin/fsck_ffs/suj.c
	+++ b/sbin/fsck_ffs/suj.c
	@@ -321,7 +321,7 @@
	* To be certain we're not freeing a reallocated block we lookup
	* this block in the blk hash and see if there is an allocation
	* journal record that overlaps with any fragments in the block
	- * we're concerned with. If any fragments have ben reallocated
	+ * we're concerned with. If any fragments have been reallocated
	* the block has already been freed and re-used for another purpose.
	*/
	mask = 0;
	@@ -378,6 +378,50 @@
	return (0);
	}

	+/*
	+ * Check to see if the requested block is available.
	+ * We can just check in the cylinder-group maps as
	+ * they will only have usable blocks in them.
	+ */
	+ufs2_daddr_t
	+suj_checkblkavail(blkno, frags)
	+ ufs2_daddr_t blkno;
	+ long frags;
	+{
	+ struct bufarea *cgbp;
	+ struct cg *cgp;
	+ ufs2_daddr_t j, k, baseblk;
	+ long cg;
	+
	+ cg = dtog(&sblock, blkno);
	+ cgbp = cglookup(cg);
	+ cgp = cgbp->b_un.b_cg;
	+ if (!check_cgmagic(cg, cgbp, 0))
	+ return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
	+ baseblk = dtogd(&sblock, blkno);
	+ for (j = 0; j <= sblock.fs_frag - frags; j++) {
	+ if (!isset(cg_blksfree(cgp), baseblk + j))
	+ continue;
	+ for (k = 1; k < frags; k++)
	+ if (!isset(cg_blksfree(cgp), baseblk + j + k))
	+ break;
	+ if (k < frags) {
	+ j += k;
	+ continue;
	+ }
	+ for (k = 0; k < frags; k++)
	+ clrbit(cg_blksfree(cgp), baseblk + j + k);
	+ n_blks += frags;
	+ if (frags == sblock.fs_frag)
	+ cgp->cg_cs.cs_nbfree--;
	+ else
	+ cgp->cg_cs.cs_nffree -= frags;
	+ cgdirty(cgbp);
	+ return ((cg * sblock.fs_fpg) + baseblk + j);
	+ }
	+ return (0);
	+}
	+
	/*
	* Clear an inode from the cg bitmap. If the inode was already clear return
	* 0 so the caller knows it does not have to check the inode contents.
	@@ -420,7 +464,7 @@
	* set in the mask.
	*/
	static void
	-blk_free(ufs2_daddr_t bno, int mask, int frags)
	+blk_free(ino_t ino, ufs2_daddr_t bno, int mask, int frags)
	{
	ufs1_daddr_t fragno, cgbno;
	struct suj_cg *sc;
	@@ -431,6 +475,13 @@
	if (debug)
	printf("Freeing %d frags at blk %jd mask 0x%x\n",
	frags, bno, mask);
	+ /*
	+ * Check to see if the block needs to be claimed by a snapshot.
	+ * If wanted, the snapshot references it. Otherwise we free it.
	+ */
	+ if (snapblkfree(fs, bno, lfragtosize(fs, frags), ino,
	+ suj_checkblkavail))
	+ return;
	cg = dtog(fs, bno);
	sc = cg_lookup(cg);
	cgp = sc->sc_cgp;
	@@ -846,7 +897,7 @@
	blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
	{

	- blk_free(blk, blk_freemask(blk, ino, lbn, frags), frags);
	+ blk_free(ino, blk, blk_freemask(blk, ino, lbn, frags), frags);
	}

	/*
	@@ -865,7 +916,7 @@
	if (lbn <= -UFS_NDADDR && follow && mask == 0)
	indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR);
	else
	- blk_free(blk, mask, frags);
	+ blk_free(ino, blk, mask, frags);
	}

	static void
	@@ -997,6 +1048,8 @@
	if ((DIP(dp, di_mode) & IFMT) == IFDIR)
	ino_visit(dp, ino, ino_free_children, 0);
	DIP_SET(dp, di_nlink, 0);
	+ if ((DIP(dp, di_flags) & SF_SNAPSHOT) != 0)
	+ snapremove(ino);
	ino_visit(dp, ino, blk_free_visit, VISIT_EXT \| VISIT_INDIR);
	/* Here we have to clear the inode and release any blocks it holds. */
	gen = DIP(dp, di_gen);
	@@ -1209,7 +1262,7 @@
	continue;
	}
	isdirty = 1;
	- blk_free(nblk, 0, fs->fs_frag);
	+ blk_free(ino, nblk, 0, fs->fs_frag);
	IBLK_SET(bp, i, 0);
	}
	if (isdirty)
	@@ -1245,6 +1298,11 @@
	dp = ip.i_dp;
	mode = DIP(dp, di_mode) & IFMT;
	cursize = DIP(dp, di_size);
	+ /* If no size change, nothing to do */
	+ if (size == cursize) {
	+ irelse(&ip);
	+ return;
	+ }
	if (debug)
	printf("Truncating ino %ju, mode %o to size %jd from size %jd\n",
	(uintmax_t)ino, mode, size, cursize);
	@@ -1264,13 +1322,14 @@
	if (size > 0)
	err_suj("Partial truncation of ino %ju snapshot file\n",
	(uintmax_t)ino);
	+ snapremove(ino);
	}
	lastlbn = lblkno(fs, blkroundup(fs, size));
	for (i = lastlbn; i < UFS_NDADDR; i++) {
	if ((bn = DIP(dp, di_db[i])) == 0)
	continue;
	blksize = sblksize(fs, cursize, i);
	- blk_free(bn, 0, numfrags(fs, blksize));
	+ blk_free(ino, bn, 0, numfrags(fs, blksize));
	DIP_SET(dp, di_db[i], 0);
	}
	/*
	@@ -1283,13 +1342,13 @@
	/* If we're not freeing any in this indirect range skip it. */
	if (lastlbn >= nextlbn)
	continue;
	- if (DIP(dp, di_ib[i]) == 0)
	- continue;
	- indir_trunc(ino, -lbn - i, DIP(dp, di_ib[i]), lastlbn, dp);
	- /* If we freed everything in this indirect free the indir. */
	- if (lastlbn > lbn)
	- continue;
	- blk_free(DIP(dp, di_ib[i]), 0, fs->fs_frag);
	+ if ((bn = DIP(dp, di_ib[i])) == 0)
	+ continue;
	+ indir_trunc(ino, -lbn - i, bn, lastlbn, dp);
	+ /* If we freed everything in this indirect free the indir. */
	+ if (lastlbn > lbn)
	+ continue;
	+ blk_free(ino, bn, 0, fs->fs_frag);
	DIP_SET(dp, di_ib[i], 0);
	}
	/*
	@@ -1319,7 +1378,7 @@
	if (oldspace != newspace) {
	bn += numfrags(fs, newspace);
	frags = numfrags(fs, oldspace - newspace);
	- blk_free(bn, 0, frags);
	+ blk_free(ino, bn, 0, frags);
	totalfrags -= frags;
	}
	}
	@@ -1468,7 +1527,7 @@
	mask >>= frags;
	blk += frags;
	frags = brec->jb_frags - frags;
	- blk_free(blk, mask, frags);
	+ blk_free(brec->jb_ino, blk, mask, frags);
	continue;
	}
	/*
	@@ -2406,6 +2465,13 @@
	}
	if (preen == 0 && (jrecs > 0 \|\| jbytes > 0) && reply("WRITE CHANGES") == 0)
	return (0);
	+ /*
	+ * Check block counts of snapshot inodes and
	+ * make copies of any needed snapshot blocks.
	+ */
	+ for (i = 0; i < snapcnt; i++)
	+ check_blkcnt(&snaplist[i]);
	+ snapflush(suj_checkblkavail);
	/*
	* Recompute the fs summary info from correct cs summaries.
	*/

File Metadata

Mime Type: text/plain
Expires: Mon, Nov 18, 12:16 PM (20 h, 50 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 14695846
Default Alt Text: D36491.id112753.diff (44 KB)

D36491.id112753.diffNo OneTemporaryActions

D36491.id112753.diffView Options

File Metadata

Event Timeline

D36491.id112753.diff
No OneTemporary
Actions

D36491.id112753.diff
View Options