Page MenuHomeFreeBSD

D36491.id111318.diff
No OneTemporary

D36491.id111318.diff

diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c
--- a/sbin/fsck_ffs/dir.c
+++ b/sbin/fsck_ffs/dir.c
@@ -679,7 +679,7 @@
struct bufarea *bp, *nbp;
struct inodesc idesc;
union dinode *dp;
- int indiralloced;
+ long cg, indiralloced;
char *cp;
nbp = NULL;
@@ -687,6 +687,7 @@
pwarn("NO SPACE LEFT IN %s", name);
if (!preen && reply("EXPAND") == 0)
return (0);
+ cg = ino_to_cg(&sblock, ip->i_number);
dp = ip->i_dp;
filesize = DIP(dp, di_size);
lastlbn = lblkno(&sblock, filesize);
@@ -705,7 +706,7 @@
bp = getdirblk(oldblk, lastlbnsize);
if (bp->b_errs)
goto bad;
- if ((newblk = allocblk(sblock.fs_frag)) == 0)
+ if ((newblk = allocblk(cg, sblock.fs_frag, checkblkavail)) == 0)
goto bad;
nbp = getdatablk(newblk, sblock.fs_bsize, BT_DIRDATA);
if (nbp->b_errs)
@@ -731,7 +732,7 @@
printf(" (EXPANDED)\n");
return (1);
}
- if ((newblk = allocblk(sblock.fs_frag)) == 0)
+ if ((newblk = allocblk(cg, sblock.fs_frag, checkblkavail)) == 0)
goto bad;
bp = getdirblk(newblk, sblock.fs_bsize);
if (bp->b_errs)
@@ -749,7 +750,8 @@
* Allocate indirect block if needed.
*/
if ((indirblk = DIP(dp, di_ib[0])) == 0) {
- if ((indirblk = allocblk(sblock.fs_frag)) == 0)
+ indirblk = allocblk(cg, sblock.fs_frag, checkblkavail);
+ if (indirblk == 0)
goto bad;
indiralloced = 1;
}
diff --git a/sbin/fsck_ffs/ea.c b/sbin/fsck_ffs/ea.c
--- a/sbin/fsck_ffs/ea.c
+++ b/sbin/fsck_ffs/ea.c
@@ -74,8 +74,10 @@
blksiz = sblock.fs_fsize;
else
blksiz = sblock.fs_bsize;
- printf("blksiz = %ju\n", (intmax_t)blksiz);
bp = getdatablk(dp->di_extb[0], blksiz, BT_EXTATTR);
+ if (bp->b_errs)
+ return (STOP);
+ printf("blksiz = %ju\n", (intmax_t)blksiz);
cp = (u_char *)bp->b_un.b_buf;
for (n = 0; n < blksiz; n++) {
printf("%02x", cp[n]);
diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h
--- a/sbin/fsck_ffs/fsck.h
+++ b/sbin/fsck_ffs/fsck.h
@@ -200,8 +200,7 @@
#define BT_INODES 7 /* Buffer holds inodes */
#define BT_DIRDATA 8 /* Buffer holds directory data */
#define BT_DATA 9 /* Buffer holds user data */
-#define BT_EMPTY 10 /* Buffer allocated but not filled */
-#define BT_NUMBUFTYPES 11
+#define BT_NUMBUFTYPES 10
#define BT_NAMES { \
"unknown", \
"Superblock", \
@@ -212,8 +211,7 @@
"External Attribute", \
"Inode Block", \
"Directory Contents", \
- "User Data", \
- "Allocated but not filled" }
+ "User Data" }
extern char *buftype[];
#define BT_BUFTYPE(type) \
type < BT_NUMBUFTYPES ? buftype[type] : buftype[BT_UNKNOWN]
@@ -234,7 +232,7 @@
(bp)->b_flags |= B_DIRTY; \
} while (0)
#define initbarea(bp, type) do { \
- (bp)->b_bno = (ufs2_daddr_t)-1; \
+ (bp)->b_bno = (ufs2_daddr_t)-4; \
(bp)->b_size = 0; \
(bp)->b_errs = 0; \
(bp)->b_flags = 0; \
@@ -347,6 +345,7 @@
extern char *cdevname; /* name of device being checked */
extern char ckclean; /* only do work if not cleanly unmounted */
extern int ckhashadd; /* check hashes to be added */
+extern char *copybuf; /* buffer to copy snapshot blocks */
extern int cvtlevel; /* convert to newer file system format */
extern long dev_bsize; /* computed value of DEV_BSIZE */
extern u_int real_dev_bsize; /* actual disk sector size, not overridden */
@@ -371,6 +370,8 @@
extern int returntosingle; /* 1 => return to single user mode on exit */
extern long secsize; /* actual disk sector size */
extern char skipclean; /* skip clean file systems if preening */
+extern int snapcnt; /* number of active snapshots */
+extern struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */
extern char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */
extern int sujrecovery; /* 1 => doing check using the journal */
extern int surrender; /* Give up if reads fail */
@@ -441,7 +442,8 @@
void adjust(struct inodesc *, int lcnt);
void alarmhandler(int sig);
-ufs2_daddr_t allocblk(long frags);
+ufs2_daddr_t allocblk(long cg, long frags, ufs2_daddr_t (*checkblkavail)
+ (ufs2_daddr_t blkno, long frags));
ino_t allocdir(ino_t parent, ino_t request, int mode);
ino_t allocino(ino_t request, int type);
void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk);
@@ -459,11 +461,13 @@
struct bufarea *cglookup(int cg);
int changeino(ino_t dir, const char *name, ino_t newnum);
int check_cgmagic(int cg, struct bufarea *cgbp, int requestrebuild);
+ufs2_daddr_t checkblkavail(ufs2_daddr_t blkno, long frags);
int chkrange(ufs2_daddr_t blk, int cnt);
void ckfini(int markclean);
int ckinode(union dinode *dp, struct inodesc *);
void clri(struct inodesc *, const char *type, int flag);
int clearentry(struct inodesc *);
+void copyonwrite(struct fs *, struct bufarea *);
void direrror(ino_t ino, const char *errmesg);
int dirscan(struct inodesc *);
int dofix(struct inodesc *, const char *msg);
@@ -505,6 +509,7 @@
void pass5(void);
void pfatal(const char *fmt, ...) __printflike(1, 2);
void propagate(void);
+void prtbuf(struct bufarea *, const char *, ...) __printflike(2, 3);
void prtinode(struct inode *);
void pwarn(const char *fmt, ...) __printflike(1, 2);
int readsb(void);
@@ -513,6 +518,10 @@
void sblock_init(void);
void setinodebuf(int, ino_t);
int setup(char *dev);
+int snapblkfree(struct fs *, ufs2_daddr_t, long, ino_t,
+ ufs2_daddr_t (*)(ufs2_daddr_t, long));
+void snapremove(ino_t);
+void snapflush(void);
void gjournal_check(const char *filesys);
int suj_check(const char *filesys);
void update_maps(struct cg *, struct cg*, int);
diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c
--- a/sbin/fsck_ffs/fsutil.c
+++ b/sbin/fsck_ffs/fsutil.c
@@ -71,7 +71,6 @@
static void slowio_start(void);
static void slowio_end(void);
static void printIOstats(void);
-static void prtbuf(const char *, struct bufarea *);
static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */
static struct timespec startpass, finishpass;
@@ -79,6 +78,7 @@
int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */
int slowio_pollcnt;
static struct bufarea cgblk; /* backup buffer for cylinder group blocks */
+static struct bufarea failedbuf; /* returned by failed getdatablk() */
static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */
static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */
static int numbufs; /* size of buffer cache */
@@ -187,6 +187,9 @@
{
int i;
+ initbarea(&failedbuf, BT_UNKNOWN);
+ failedbuf.b_errs = -1;
+ failedbuf.b_un.b_buf = NULL;
if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL)
errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize);
initbarea(&cgblk, BT_CYLGRP);
@@ -300,7 +303,7 @@
}
/*
- * Manage a cache of directory blocks.
+ * Manage a cache of filesystem disk blocks.
*/
struct bufarea *
getdatablk(ufs2_daddr_t blkno, long size, int type)
@@ -309,16 +312,19 @@
struct bufhash *bhdp;
cachelookups++;
- /* If out of range, return empty buffer with b_err == -1 */
- if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) {
- blkno = -1;
- type = BT_EMPTY;
- }
+ /*
+ * If out of range, return empty buffer with b_err == -1
+ *
+ * Skip check for inodes because chkrange() considers
+ * metadata areas invalid to write data.
+ */
+ if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize))
+ return (&failedbuf);
bhdp = &bufhashhd[HASH(blkno)];
LIST_FOREACH(bp, bhdp, b_hash)
if (bp->b_bno == fsbtodb(&sblock, blkno)) {
if (debug && bp->b_size != size) {
- prtbuf("getdatablk: size mismatch", bp);
+ prtbuf(bp, "getdatablk: size mismatch");
pfatal("getdatablk: b_size %d != size %ld\n",
bp->b_size, size);
}
@@ -378,7 +384,7 @@
if (debug && bp->b_type != type) {
printf("getdatablk: buffer type changed to %s",
BT_BUFTYPE(type));
- prtbuf("", bp);
+ prtbuf(bp, "");
}
TAILQ_REMOVE(&bufqueuehd, bp, b_list);
TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
@@ -401,11 +407,7 @@
readcnt[bp->b_type]++;
clock_gettime(CLOCK_REALTIME_PRECISE, &start);
}
- if (bp->b_type != BT_EMPTY)
- bp->b_errs =
- blread(fsreadfd, bp->b_un.b_buf, dblk, size);
- else
- bp->b_errs = -1;
+ bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size);
if (debug) {
clock_gettime(CLOCK_REALTIME_PRECISE, &finish);
timespecsub(&finish, &start, &finish);
@@ -422,7 +424,7 @@
{
if (bp->b_refcnt <= 0)
- prtbuf("brelse: buffer with negative reference count", bp);
+ prtbuf(bp, "brelse: buffer with negative reference count");
bp->b_refcnt--;
}
@@ -451,10 +453,18 @@
if (bp != &sblk)
pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n",
bp, &sblk);
+ /*
+ * Superblocks are always pre-copied so we do not need
+ * to check them for copy-on-write.
+ */
if (sbput(fd, bp->b_un.b_fs, 0) == 0)
fsmodified = 1;
break;
case BT_CYLGRP:
+ /*
+ * Cylinder groups are always pre-copied so we do not
+ * need to check them for copy-on-write.
+ */
if (sujrecovery)
cg_write(bp);
if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0)
@@ -483,11 +493,38 @@
}
/* FALLTHROUGH */
default:
+ copyonwrite(&sblock, bp);
blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size);
break;
}
}
+/*
+ * If there are any snapshots, ensure that all the blocks that they
+ * care about have been copied, then release the snapshot inodes.
+ * These operations need to be done before we rebuild the cylinder
+ * groups so that any block allocations are properly recorded.
+ * Since all the cylinder group maps have already been copied in
+ * the snapshots, no further snapshot copies will need to be done.
+ */
+void
+snapflush(void)
+{
+ struct bufarea *bp;
+ int cnt;
+
+ if (snapcnt > 0) {
+ if (debug)
+ printf("Check for snapshot copies\n");
+ TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
+ if ((bp->b_flags & B_DIRTY) != 0)
+ copyonwrite(&sblock, bp);
+ for (cnt = 0; cnt < snapcnt; cnt++)
+ irelse(&snaplist[cnt]);
+ snapcnt = 0;
+ }
+}
+
/*
* Journaled soft updates does not maintain cylinder group summary
* information during cleanup, so this routine recalculates the summary
@@ -503,6 +540,7 @@
int blk;
int i;
+ snapflush();
/*
* Fix the frag and cluster summary.
*/
@@ -587,6 +625,7 @@
(void)close(fsreadfd);
return;
}
+
/*
* To remain idempotent with partial truncations the buffers
* must be flushed in this order:
@@ -629,14 +668,9 @@
case BT_SUPERBLK:
case BT_CYLGRP:
default:
- prtbuf("ckfini: improper buffer type on cache list",bp);
+ prtbuf(bp,"ckfini: improper buffer type on cache list");
continue;
/* These are the ones to flush in this step */
- case BT_EMPTY:
- if (bp->b_bno >= 0)
- pfatal("Unused BT_EMPTY buffer for block %jd\n",
- (intmax_t)bp->b_bno);
- /* FALLTHROUGH */
case BT_LEVEL1:
case BT_LEVEL2:
case BT_LEVEL3:
@@ -649,10 +683,11 @@
continue;
}
if (debug && bp->b_refcnt != 0) {
- prtbuf("ckfini: clearing in-use buffer", bp);
+ prtbuf(bp, "ckfini: clearing in-use buffer");
pfatal("ckfini: clearing in-use buffer\n");
}
TAILQ_REMOVE(&bufqueuehd, bp, b_list);
+ LIST_REMOVE(bp, b_hash);
cnt++;
flush(fswritefd, bp);
free(bp->b_un.b_buf);
@@ -667,10 +702,11 @@
}
TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
if (debug && bp->b_refcnt != 0) {
- prtbuf("ckfini: clearing in-use buffer", bp);
+ prtbuf(bp, "ckfini: clearing in-use buffer");
pfatal("ckfini: clearing in-use buffer\n");
}
TAILQ_REMOVE(&bufqueuehd, bp, b_list);
+ LIST_REMOVE(bp, b_hash);
cnt++;
flush(fswritefd, bp);
free(bp->b_un.b_buf);
@@ -1050,45 +1086,73 @@
* allocate a data block with the specified number of fragments
*/
ufs2_daddr_t
-allocblk(long frags)
+allocblk(long startcg, long frags,
+ ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
{
- int i, j, k, cg, baseblk;
- struct bufarea *cgbp;
- struct cg *cgp;
+ ufs2_daddr_t blkno, newblk;
if (frags <= 0 || frags > sblock.fs_frag)
return (0);
- for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) {
- for (j = 0; j <= sblock.fs_frag - frags; j++) {
- if (testbmap(i + j))
- continue;
- for (k = 1; k < frags; k++)
- if (testbmap(i + j + k))
- break;
- if (k < frags) {
- j += k;
- continue;
- }
- cg = dtog(&sblock, i + j);
- cgbp = cglookup(cg);
- cgp = cgbp->b_un.b_cg;
- if (!check_cgmagic(cg, cgbp, 0)) {
- i = (cg + 1) * sblock.fs_fpg - sblock.fs_frag;
- continue;
- }
- baseblk = dtogd(&sblock, i + j);
- for (k = 0; k < frags; k++) {
- setbmap(i + j + k);
- clrbit(cg_blksfree(cgp), baseblk + k);
- }
- n_blks += frags;
- if (frags == sblock.fs_frag)
- cgp->cg_cs.cs_nbfree--;
- else
- cgp->cg_cs.cs_nffree -= frags;
- cgdirty(cgbp);
- return (i + j);
+ for (blkno = cgdata(&sblock, startcg);
+ blkno < maxfsblock - sblock.fs_frag;
+ blkno += sblock.fs_frag) {
+ if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
+ continue;
+ if (newblk > 0)
+ return (newblk);
+ if (newblk < 0)
+ blkno = -newblk;
+ }
+ for (blkno = cgdata(&sblock, 0);
+ blkno < cgbase(&sblock, startcg) - sblock.fs_frag;
+ blkno += sblock.fs_frag) {
+ if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
+ continue;
+ if (newblk > 0)
+ return (newblk);
+ if (newblk < 0)
+ blkno = -newblk;
+ }
+ return (0);
+}
+
+ufs2_daddr_t
+checkblkavail(blkno, frags)
+ ufs2_daddr_t blkno;
+ long frags;
+{
+ struct bufarea *cgbp;
+ struct cg *cgp;
+ ufs2_daddr_t j, k, baseblk;
+ long cg;
+
+ for (j = 0; j <= sblock.fs_frag - frags; j++) {
+ if (testbmap(blkno + j))
+ continue;
+ for (k = 1; k < frags; k++)
+ if (testbmap(blkno + j + k))
+ break;
+ if (k < frags) {
+ j += k;
+ continue;
+ }
+ cg = dtog(&sblock, blkno + j);
+ cgbp = cglookup(cg);
+ cgp = cgbp->b_un.b_cg;
+ if (!check_cgmagic(cg, cgbp, 0))
+ return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
+ baseblk = dtogd(&sblock, blkno + j);
+ for (k = 0; k < frags; k++) {
+ setbmap(blkno + j + k);
+ clrbit(cg_blksfree(cgp), baseblk + k);
}
+ n_blks += frags;
+ if (frags == sblock.fs_frag)
+ cgp->cg_cs.cs_nbfree--;
+ else
+ cgp->cg_cs.cs_nffree -= frags;
+ cgdirty(cgbp);
+ return (blkno + j);
}
return (0);
}
@@ -1261,14 +1325,19 @@
/*
* Print details about a buffer.
*/
-static void
-prtbuf(const char *msg, struct bufarea *bp)
+void
+prtbuf(struct bufarea *bp, const char *fmt, ...)
{
-
- printf("%s: bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
- "index %jd\n", msg, bp, BT_BUFTYPE(bp->b_type),
- (intmax_t) bp->b_bno, bp->b_size, bp->b_refcnt,
- bp->b_flags & B_DIRTY ? "dirty" : "clean", (intmax_t) bp->b_index);
+ va_list ap;
+ va_start(ap, fmt);
+ if (preen)
+ (void)fprintf(stdout, "%s: ", cdevname);
+ (void)vfprintf(stdout, fmt, ap);
+ va_end(ap);
+ printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
+ "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno,
+ bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean",
+ (intmax_t) bp->b_index);
}
/*
diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c
--- a/sbin/fsck_ffs/inode.c
+++ b/sbin/fsck_ffs/inode.c
@@ -38,6 +38,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/stat.h>
#include <sys/stdint.h>
#include <sys/sysctl.h>
@@ -58,6 +59,8 @@
static int iblock(struct inodesc *, off_t isize, int type);
static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
struct bufarea **);
+static int snapclean(struct inodesc *idesc);
+static void chkcopyonwrite(struct fs *, ufs2_daddr_t);
int
ckinode(union dinode *dp, struct inodesc *idesc)
@@ -378,8 +381,12 @@
int c;
if (cnt <= 0 || blk <= 0 || blk > maxfsblock ||
- cnt - 1 > maxfsblock - blk)
+ cnt - 1 > maxfsblock - blk) {
+ if (debug)
+ printf("out of range: blk %ld, offset %i, size %d\n",
+ (long)blk, (int)fragnum(&sblock, blk), cnt);
return (1);
+ }
if (cnt > sblock.fs_frag ||
fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
if (debug)
@@ -651,10 +658,17 @@
{
struct dups *dlp;
ufs2_daddr_t blkno;
- long nfrags, res;
+ long size, nfrags, res;
res = KEEPON;
blkno = idesc->id_blkno;
+ if (idesc->id_type == SNAP) {
+ pfatal("clearing a snapshot dinode\n");
+ return (STOP);
+ }
+ size = lfragtosize(&sblock, idesc->id_numfrags);
+ if (snapblkfree(&sblock, blkno, size, idesc->id_number, checkblkavail))
+ return (res);
for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
if (chkrange(blkno, 1)) {
res = SKIP;
@@ -677,9 +691,323 @@
return (res);
}
+/*
+ * Prepare a snapshot file for being removed.
+ */
+void
+snapremove(ino_t inum)
+{
+ struct inodesc idesc;
+ struct inode ip;
+ int i;
+
+ for (i = 0; i < snapcnt; i++)
+ if (snaplist[i].i_number == inum)
+ break;
+ ip = snaplist[i];
+ if (i == snapcnt || (DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
+ printf("snapremove: inode %jd is not a snapshot\n",
+ (intmax_t)inum);
+ return;
+ }
+ /*
+ * Remove from active snapshot list.
+ */
+ for (i++; i < FSMAXSNAP; i++) {
+ if (sblock.fs_snapinum[i] == 0)
+ break;
+ snaplist[i - 1] = snaplist[i];
+ sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
+ }
+ sblock.fs_snapinum[i - 1] = 0;
+ bzero(&snaplist[i - 1], sizeof(struct inode));
+ snapcnt--;
+ idesc.id_type = SNAP;
+ idesc.id_func = snapclean;
+ idesc.id_number = inum;
+ (void)ckinode(ip.i_dp, &idesc);
+ DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
+ inodirty(&ip);
+ irelse(&ip);
+}
+
+static int
+snapclean(struct inodesc *idesc)
+{
+ ufs2_daddr_t blkno;
+ struct bufarea *bp;
+ union dinode *dp;
+
+ blkno = idesc->id_blkno;
+ if (blkno == 0)
+ return (KEEPON);
+
+ bp = idesc->id_bp;
+ dp = idesc->id_dp;
+ if (blkno == BLK_NOCOPY || blkno == BLK_SNAP ||
+ (blkno == blkstofrags(&sblock, idesc->id_lbn) &&
+ snapblkfree(&sblock, blkno, sblock.fs_bsize, idesc->id_number,
+ checkblkavail))) {
+ if (idesc->id_lbn < UFS_NDADDR)
+ DIP_SET(dp, di_db[idesc->id_lbn], 0);
+ else
+ IBLK_SET(bp, bp->b_index, 0);
+ if (blkno == blkstofrags(&sblock, idesc->id_lbn))
+ DIP_SET(dp, di_blocks, DIP(dp, di_blocks) -
+ btodb(sblock.fs_bsize));
+ dirty(bp);
+ }
+ return (KEEPON);
+}
+
+/*
+ * Notification that a block is being freed. Return zero if the free
+ * should be allowed to proceed. Return non-zero if the snapshot file
+ * wants to claim the block. The block will be claimed if it is an
+ * uncopied part of one of the snapshots. It will be freed if it is
+ * either a BLK_NOCOPY or has already been copied in all of the snapshots.
+ * If a fragment is being freed, then all snapshots that care about
+ * it must make a copy since a snapshot file can only claim full sized
+ * blocks. Note that if more than one snapshot file maps the block,
+ * we can pick one at random to claim it. Since none of the snapshots
+ * can change, we are assurred that they will all see the same unmodified
+ * image. When deleting a snapshot file (see ino_trunc above), we
+ * must push any of these claimed blocks to one of the other snapshots
+ * that maps it. These claimed blocks are easily identified as they will
+ * have a block number equal to their logical block number within the
+ * snapshot. A copied block can never have this property because they
+ * must always have been allocated from a BLK_NOCOPY location.
+ */
+int
+snapblkfree(fs, bno, size, inum, checkblkavail)
+ struct fs *fs;
+ ufs2_daddr_t bno;
+ long size;
+ ino_t inum;
+ ufs2_daddr_t (*checkblkavail)(long cg, long frags);
+{
+ union dinode *dp;
+ struct inode ip;
+ struct bufarea *snapbp;
+ ufs_lbn_t lbn;
+ ufs2_daddr_t blkno, relblkno;
+ int i, frags, claimedblk, copydone;
+
+ /* If no snapshots, nothing to do */
+ if (snapcnt == 0)
+ return (0);
+ if (debug)
+ printf("snapblkfree: in ino %ld free blkno %ld, size %ld\n",
+ inum, bno, size);
+ relblkno = blknum(fs, bno);
+ lbn = fragstoblks(fs, relblkno);
+ /* Direct blocks are always pre-copied */
+ if (lbn < UFS_NDADDR)
+ return (0);
+ copydone = 0;
+ claimedblk = 0;
+ for (i = 0; i < snapcnt; i++) {
+ /*
+ * Lookup block being freed.
+ */
+ ip = snaplist[i];
+ dp = ip.i_dp;
+ blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
+ lbn, &frags, &snapbp);
+ /*
+ * Check to see if block needs to be copied.
+ */
+ if (blkno == 0) {
+ /*
+ * A block that we map is being freed. If it has not
+ * been claimed yet, we will claim or copy it (below).
+ */
+ claimedblk = 1;
+ } else if (blkno == BLK_SNAP) {
+ /*
+ * No previous snapshot claimed the block,
+ * so it will be freed and become a BLK_NOCOPY
+ * (don't care) for us.
+ */
+ if (claimedblk)
+ pfatal("snapblkfree: inconsistent block type");
+ IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
+ dirty(snapbp);
+ brelse(snapbp);
+ continue;
+ } else /* BLK_NOCOPY or default */ {
+ /*
+ * If the snapshot has already copied the block
+ * (default), or does not care about the block,
+ * it is not needed.
+ */
+ brelse(snapbp);
+ continue;
+ }
+ /*
+ * If this is a full size block, we will just grab it
+ * and assign it to the snapshot inode. Otherwise we
+ * will proceed to copy it. See explanation for this
+ * routine as to why only a single snapshot needs to
+ * claim this block.
+ */
+ if (size == fs->fs_bsize) {
+ if (debug)
+ printf("Grabonremove snapshot %ju lbn %jd "
+ "from inum %ju\n", (intmax_t)ip.i_number,
+ (intmax_t)lbn, (uintmax_t)inum);
+ IBLK_SET(snapbp, snapbp->b_index, relblkno);
+ dirty(snapbp);
+ brelse(snapbp);
+ DIP_SET(dp, di_blocks,
+ DIP(dp, di_blocks) + btodb(size));
+ inodirty(&ip);
+ return (1);
+ }
+
+ /* First time through, read the contents of the old block. */
+ if (copydone == 0) {
+ copydone = 1;
+ if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
+ fs->fs_bsize) != 0) {
+ pfatal("Could not read snapshot %ju block "
+ "%jd\n", (intmax_t)ip.i_number,
+ (intmax_t)relblkno);
+ continue;
+ }
+ }
+ /*
+ * This allocation will never require any additional
+ * allocations for the snapshot inode.
+ */
+ blkno = (*allocblk)(dtog(fs, relblkno), fs->fs_frag,
+ checkblkavail);
+ if (blkno == 0) {
+ pfatal("Could not allocate block for snapshot %ju\n",
+ (intmax_t)ip.i_number);
+ continue;
+ }
+ if (debug)
+ printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
+ "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
+ (intmax_t)lbn, (uintmax_t)inum, size,
+ (intmax_t)blkno);
+ blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
+ IBLK_SET(snapbp, snapbp->b_index, blkno);
+ dirty(snapbp);
+ brelse(snapbp);
+ DIP_SET(dp, di_blocks,
+ DIP(dp, di_blocks) + btodb(fs->fs_bsize));
+ inodirty(&ip);
+ }
+ return (0);
+}
+
+/*
+ * Notification that a block is being written. Return if the block
+ * is part of a snapshot as snapshots never track other snapshots.
+ * The block will be copied in all of the snapshots that are tracking
+ * it and have not yet copied it. Some buffers may hold more than one
+ * block. Here we need to check each block in the buffer.
+ */
+void
+copyonwrite(fs, bp)
+ struct fs *fs;
+ struct bufarea *bp;
+{
+ ufs2_daddr_t copyblkno;
+ long i, numblks;
+
+ /* If no snapshots, nothing to do. */
+ if (snapcnt == 0)
+ return;
+ numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
+ if (debug)
+ prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
+ numblks, numblks > 1 ? "s" : "");
+ copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
+ for (i = 0; i < numblks; i++) {
+ chkcopyonwrite(fs, copyblkno);
+ copyblkno += fs->fs_frag;
+ }
+}
+
+static void
+chkcopyonwrite(fs, copyblkno)
+ struct fs *fs;
+ ufs2_daddr_t copyblkno;
+{
+ struct inode ip;
+ union dinode *dp;
+ struct bufarea *snapbp;
+ ufs2_daddr_t blkno;
+ int i, frags, copydone;
+ ufs_lbn_t lbn;
+
+ lbn = fragstoblks(fs, copyblkno);
+ /* Direct blocks are always pre-copied */
+ if (lbn < UFS_NDADDR)
+ return;
+ copydone = 0;
+ for (i = 0; i < snapcnt; i++) {
+ /*
+ * Lookup block being freed.
+ */
+ ip = snaplist[i];
+ dp = ip.i_dp;
+ blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
+ /*
+ * Check to see if block needs to be copied.
+ */
+ if (blkno != 0) {
+ /*
+ * A block that we have already copied or don't track.
+ */
+ brelse(snapbp);
+ continue;
+ }
+ /* First time through, read the contents of the old block. */
+ if (copydone == 0) {
+ copydone = 1;
+ if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
+ fs->fs_bsize) != 0) {
+ pfatal("Could not read snapshot %ju block "
+ "%jd\n", (intmax_t)ip.i_number,
+ (intmax_t)copyblkno);
+ continue;
+ }
+ }
+ /*
+ * This allocation will never require any additional
+ * allocations for the snapshot inode.
+ */
+ if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
+ checkblkavail)) == 0) {
+ pfatal("Could not allocate block for snapshot %ju\n",
+ (intmax_t)ip.i_number);
+ continue;
+ }
+ if (debug)
+ prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
+ "blkno %ju setting in buffer",
+ (intmax_t)ip.i_number, (intmax_t)lbn,
+ (intmax_t)blkno);
+ blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
+ IBLK_SET(snapbp, snapbp->b_index, blkno);
+ dirty(snapbp);
+ brelse(snapbp);
+ DIP_SET(dp, di_blocks,
+ DIP(dp, di_blocks) + btodb(fs->fs_bsize));
+ inodirty(&ip);
+ }
+ return;
+}
+
void
freeinodebuf(void)
{
+ struct bufarea *bp;
+ int i;
/*
* Flush old contents in case they have been updated.
@@ -689,6 +1017,14 @@
free((char *)inobuf.b_un.b_buf);
inobuf.b_un.b_buf = NULL;
firstinum = lastinum = 0;
+ /*
+ * Reload the snapshot inodes in case any of them changed.
+ */
+ for (i = 0; i < snapcnt; i++) {
+ bp = snaplist[i].i_bp;
+ bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
+ bp->b_size);
+ }
}
/*
@@ -803,6 +1139,10 @@
printf(" (CLEARED)\n");
n_files--;
if (bkgrdflag == 0) {
+ if (idesc->id_type == SNAP) {
+ snapremove(idesc->id_number);
+ idesc->id_type = ADDR;
+ }
(void)ckinode(dp, idesc);
inoinfo(idesc->id_number)->ino_state = USTATE;
clearinode(dp);
@@ -967,7 +1307,8 @@
cgdirty(cgbp);
ginode(ino, &ip);
dp = ip.i_dp;
- DIP_SET(dp, di_db[0], allocblk((long)1));
+ DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
+ checkblkavail));
if (DIP(dp, di_db[0]) == 0) {
inoinfo(ino)->ino_state = USTATE;
irelse(&ip);
diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c
--- a/sbin/fsck_ffs/main.c
+++ b/sbin/fsck_ffs/main.c
@@ -491,6 +491,7 @@
*/
if (preen == 0)
printf("** Phase 5 - Check Cyl groups\n");
+ snapflush();
pass5();
IOstats("Pass5");
diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c
--- a/sbin/fsck_ffs/setup.c
+++ b/sbin/fsck_ffs/setup.c
@@ -59,6 +59,9 @@
#include "fsck.h"
struct inoinfo **inphead, **inpsort; /* info about all inodes */
+struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */
+int snapcnt; /* number of active snapshots */
+char *copybuf; /* buffer to copy snapshot blocks */
static int sbhashfailed;
#define POWEROF2(num) (((num) & ((num) - 1)) == 0)
@@ -66,6 +69,8 @@
static int calcsb(char *dev, int devfd, struct fs *fs);
static void saverecovery(int readfd, int writefd);
static int chkrecovery(int devfd);
+static int getlbnblkno(struct inodesc *);
+static int checksnapinfo(struct inode *);
/*
* Read in a superblock finding an alternate if necessary.
@@ -75,7 +80,8 @@
int
setup(char *dev)
{
- long bmapsize;
+ long i, bmapsize;
+ struct inode ip;
/*
* We are expected to have an open file descriptor and a superblock.
@@ -174,6 +180,39 @@
usedsoftdep = 1;
else
usedsoftdep = 0;
+ /*
+ * Collect any snapshot inodes so that we can allow them to
+ * claim any blocks that we free. The code for doing this is
+ * imported here and into inode.c from sys/ufs/ffs/ffs_snapshot.c.
+ */
+ for (snapcnt = 0; snapcnt < FSMAXSNAP; snapcnt++) {
+ if (sblock.fs_snapinum[snapcnt] == 0)
+ break;
+ ginode(sblock.fs_snapinum[snapcnt], &ip);
+ if ((DIP(ip.i_dp, di_mode) & IFMT) == IFREG &&
+ (DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) != 0 &&
+ checksnapinfo(&ip)) {
+ snaplist[snapcnt] = ip;
+ continue;
+ }
+ printf("Removing non-snapshot inode %ju from snapshot list\n",
+ (uintmax_t)sblock.fs_snapinum[snapcnt]);
+ irelse(&ip);
+ for (i = snapcnt + 1; i < FSMAXSNAP; i++) {
+ if (sblock.fs_snapinum[i] == 0)
+ break;
+ sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
+ }
+ sblock.fs_snapinum[i - 1] = 0;
+ snapcnt--;
+ sbdirty();
+ }
+ if (snapcnt > 0 && copybuf == NULL) {
+ copybuf = Malloc(sblock.fs_bsize);
+ if (copybuf == NULL)
+ errx(EEXIT, "cannot allocate space for snapshot "
+ "copy buffer");
+ }
return (1);
badsb:
@@ -181,6 +220,100 @@
return (0);
}
+/*
+ * Check for valid snapshot information.
+ */
+#define CHKBLKINLIST(chkblk) \
+ if ((chkblk) >= UFS_NDADDR) { \
+ while (*blkp < (chkblk) && blkp < lastblkp) \
+ blkp++; \
+ if (blkp >= lastblkp) { \
+ pwarn("UFS%d snapshot inode %jd failed: " \
+ "improper block list length (%jd)\n", \
+ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \
+ (intmax_t)snapip->i_number, \
+ (intmax_t)(lastblkp - &snapblklist[0])); \
+ status = 0; \
+ goto fail; \
+ } \
+ if (*blkp++ != (chkblk)) { \
+ pwarn("UFS%d snapshot inode %jd failed: " \
+ "block list (%jd) != %s (%jd)\n", \
+ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \
+ (intmax_t)snapip->i_number, \
+ (intmax_t)blkp[-1], #chkblk, \
+ (intmax_t)chkblk); \
+ status = 0; \
+ goto fail; \
+ } \
+ }
+
+static int
+checksnapinfo(struct inode *snapip)
+{
+ struct fs *fs;
+ struct bufarea *bp;
+ struct inodesc idesc;
+ daddr_t *snapblklist, *blkp, *lastblkp, csblkno;
+ int cg, loc, len, status;
+ ufs_lbn_t lbn;
+ size_t size;
+
+ fs = &sblock;
+ memset(&idesc, 0, sizeof(struct inodesc));
+ idesc.id_type = ADDR;
+ idesc.id_func = getlbnblkno;
+ idesc.id_number = snapip->i_number;
+ lbn = howmany(fs->fs_size, fs->fs_frag);
+ idesc.id_parent = lbn; /* sought after blkno */
+ if ((ckinode(snapip->i_dp, &idesc) & FOUND) == 0)
+ return (0);
+ size = fragroundup(fs,
+ DIP(snapip->i_dp, di_size) - lblktosize(fs, lbn));
+ bp = getdatablk(idesc.id_parent, size, BT_DATA);
+ snapblklist = (daddr_t *)bp->b_un.b_buf;
+ /*
+ * Check that superblock and all cylinder groups are listed.
+ */
+ status = 1;
+ blkp = &snapblklist[1];
+ lastblkp = &snapblklist[MAX(0,
+ MIN(snapblklist[0] + 1, size / sizeof(daddr_t)))];
+printf("max chk %ld, list size %ld space size %ld\n",
+ MAX(0, MIN(snapblklist[0] + 1, size / sizeof(daddr_t))),
+ snapblklist[0] + 1, size / sizeof(daddr_t));
+ CHKBLKINLIST(lblkno(fs, fs->fs_sblockloc));
+ csblkno = fragstoblks(fs, fs->fs_csaddr);
+ for (cg = 0; cg < fs->fs_ncg; cg++) {
+ if (fragstoblks(fs, cgtod(fs, cg)) > csblkno)
+ break;
+ CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg)));
+ }
+ len = howmany(fs->fs_cssize, fs->fs_bsize);
+ for (loc = 0; loc < len; loc++)
+ CHKBLKINLIST(csblkno + loc);
+ for (; cg < fs->fs_ncg; cg++)
+ CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg)));
+fail:
+ brelse(bp);
+ return (status);
+}
+
+/*
+ * Return the block number associated with a specified inode lbn.
+ * Requested lbn is in id_parent. If found, block is returned in
+ * id_parent.
+ */
+static int
+getlbnblkno(struct inodesc *idesc)
+{
+
+ if (idesc->id_lbn < idesc->id_parent)
+ return (KEEPON);
+ idesc->id_parent = idesc->id_blkno;
+ return (STOP | FOUND);
+}
+
/*
* Open a device or file to be checked by fsck.
*/
diff --git a/sbin/fsck_ffs/suj.c b/sbin/fsck_ffs/suj.c
--- a/sbin/fsck_ffs/suj.c
+++ b/sbin/fsck_ffs/suj.c
@@ -321,7 +321,7 @@
* To be certain we're not freeing a reallocated block we lookup
* this block in the blk hash and see if there is an allocation
* journal record that overlaps with any fragments in the block
- * we're concerned with. If any fragments have ben reallocated
+ * we're concerned with. If any fragments have been reallocated
* the block has already been freed and re-used for another purpose.
*/
mask = 0;
@@ -378,6 +378,50 @@
return (0);
}
+/*
+ * Check to see if the requested block is available.
+ * We can just check in the cylinder-group maps as
+ * they will only have usable blocks in them.
+ */
+static ufs2_daddr_t
+suj_checkblkavail(blkno, frags)
+ ufs2_daddr_t blkno;
+ long frags;
+{
+ struct bufarea *cgbp;
+ struct cg *cgp;
+ ufs2_daddr_t j, k, baseblk;
+ long cg;
+
+ cg = dtog(&sblock, blkno);
+ cgbp = cglookup(cg);
+ cgp = cgbp->b_un.b_cg;
+ if (!check_cgmagic(cg, cgbp, 0))
+ return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
+ baseblk = dtogd(&sblock, blkno);
+ for (j = 0; j <= sblock.fs_frag - frags; j++) {
+ if (!isset(cg_blksfree(cgp), baseblk + j))
+ continue;
+ for (k = 1; k < frags; k++)
+ if (!isset(cg_blksfree(cgp), baseblk + j + k))
+ break;
+ if (k < frags) {
+ j += k;
+ continue;
+ }
+ for (k = 0; k < frags; k++)
+ clrbit(cg_blksfree(cgp), baseblk + j + k);
+ n_blks += frags;
+ if (frags == sblock.fs_frag)
+ cgp->cg_cs.cs_nbfree--;
+ else
+ cgp->cg_cs.cs_nffree -= frags;
+ cgdirty(cgbp);
+ return ((cg * sblock.fs_fpg) + baseblk + j);
+ }
+ return (0);
+}
+
/*
* Clear an inode from the cg bitmap. If the inode was already clear return
* 0 so the caller knows it does not have to check the inode contents.
@@ -431,6 +475,12 @@
if (debug)
printf("Freeing %d frags at blk %jd mask 0x%x\n",
frags, bno, mask);
+ /*
+ * Check to see if the block needs to be claimed by a snapshot.
+ * If wanted, the snapshot references it. Otherwise we free it.
+ */
+ if (snapblkfree(fs, bno, lfragtosize(fs, frags), 0, suj_checkblkavail))
+ return;
cg = dtog(fs, bno);
sc = cg_lookup(cg);
cgp = sc->sc_cgp;
@@ -1264,6 +1314,7 @@
if (size > 0)
err_suj("Partial truncation of ino %ju snapshot file\n",
(uintmax_t)ino);
+ snapremove(ino);
}
lastlbn = lblkno(fs, blkroundup(fs, size));
for (i = lastlbn; i < UFS_NDADDR; i++) {
@@ -1283,13 +1334,13 @@
/* If we're not freeing any in this indirect range skip it. */
if (lastlbn >= nextlbn)
continue;
- if (DIP(dp, di_ib[i]) == 0)
- continue;
- indir_trunc(ino, -lbn - i, DIP(dp, di_ib[i]), lastlbn, dp);
- /* If we freed everything in this indirect free the indir. */
- if (lastlbn > lbn)
- continue;
- blk_free(DIP(dp, di_ib[i]), 0, fs->fs_frag);
+ if ((bn = DIP(dp, di_ib[i])) == 0)
+ continue;
+ indir_trunc(ino, -lbn - i, bn, lastlbn, dp);
+ /* If we freed everything in this indirect free the indir. */
+ if (lastlbn > lbn)
+ continue;
+ blk_free(bn, 0, fs->fs_frag);
DIP_SET(dp, di_ib[i], 0);
}
/*

File Metadata

Mime Type
text/plain
Expires
Sun, Jan 12, 10:56 AM (56 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15768722
Default Alt Text
D36491.id111318.diff (34 KB)

Event Timeline