Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F107192325
D36491.id111318.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
34 KB
Referenced Files
None
Subscribers
None
D36491.id111318.diff
View Options
diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c
--- a/sbin/fsck_ffs/dir.c
+++ b/sbin/fsck_ffs/dir.c
@@ -679,7 +679,7 @@
struct bufarea *bp, *nbp;
struct inodesc idesc;
union dinode *dp;
- int indiralloced;
+ long cg, indiralloced;
char *cp;
nbp = NULL;
@@ -687,6 +687,7 @@
pwarn("NO SPACE LEFT IN %s", name);
if (!preen && reply("EXPAND") == 0)
return (0);
+ cg = ino_to_cg(&sblock, ip->i_number);
dp = ip->i_dp;
filesize = DIP(dp, di_size);
lastlbn = lblkno(&sblock, filesize);
@@ -705,7 +706,7 @@
bp = getdirblk(oldblk, lastlbnsize);
if (bp->b_errs)
goto bad;
- if ((newblk = allocblk(sblock.fs_frag)) == 0)
+ if ((newblk = allocblk(cg, sblock.fs_frag, checkblkavail)) == 0)
goto bad;
nbp = getdatablk(newblk, sblock.fs_bsize, BT_DIRDATA);
if (nbp->b_errs)
@@ -731,7 +732,7 @@
printf(" (EXPANDED)\n");
return (1);
}
- if ((newblk = allocblk(sblock.fs_frag)) == 0)
+ if ((newblk = allocblk(cg, sblock.fs_frag, checkblkavail)) == 0)
goto bad;
bp = getdirblk(newblk, sblock.fs_bsize);
if (bp->b_errs)
@@ -749,7 +750,8 @@
* Allocate indirect block if needed.
*/
if ((indirblk = DIP(dp, di_ib[0])) == 0) {
- if ((indirblk = allocblk(sblock.fs_frag)) == 0)
+ indirblk = allocblk(cg, sblock.fs_frag, checkblkavail);
+ if (indirblk == 0)
goto bad;
indiralloced = 1;
}
diff --git a/sbin/fsck_ffs/ea.c b/sbin/fsck_ffs/ea.c
--- a/sbin/fsck_ffs/ea.c
+++ b/sbin/fsck_ffs/ea.c
@@ -74,8 +74,10 @@
blksiz = sblock.fs_fsize;
else
blksiz = sblock.fs_bsize;
- printf("blksiz = %ju\n", (intmax_t)blksiz);
bp = getdatablk(dp->di_extb[0], blksiz, BT_EXTATTR);
+ if (bp->b_errs)
+ return (STOP);
+ printf("blksiz = %ju\n", (intmax_t)blksiz);
cp = (u_char *)bp->b_un.b_buf;
for (n = 0; n < blksiz; n++) {
printf("%02x", cp[n]);
diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h
--- a/sbin/fsck_ffs/fsck.h
+++ b/sbin/fsck_ffs/fsck.h
@@ -200,8 +200,7 @@
#define BT_INODES 7 /* Buffer holds inodes */
#define BT_DIRDATA 8 /* Buffer holds directory data */
#define BT_DATA 9 /* Buffer holds user data */
-#define BT_EMPTY 10 /* Buffer allocated but not filled */
-#define BT_NUMBUFTYPES 11
+#define BT_NUMBUFTYPES 10
#define BT_NAMES { \
"unknown", \
"Superblock", \
@@ -212,8 +211,7 @@
"External Attribute", \
"Inode Block", \
"Directory Contents", \
- "User Data", \
- "Allocated but not filled" }
+ "User Data" }
extern char *buftype[];
#define BT_BUFTYPE(type) \
type < BT_NUMBUFTYPES ? buftype[type] : buftype[BT_UNKNOWN]
@@ -234,7 +232,7 @@
(bp)->b_flags |= B_DIRTY; \
} while (0)
#define initbarea(bp, type) do { \
- (bp)->b_bno = (ufs2_daddr_t)-1; \
+ (bp)->b_bno = (ufs2_daddr_t)-4; \
(bp)->b_size = 0; \
(bp)->b_errs = 0; \
(bp)->b_flags = 0; \
@@ -347,6 +345,7 @@
extern char *cdevname; /* name of device being checked */
extern char ckclean; /* only do work if not cleanly unmounted */
extern int ckhashadd; /* check hashes to be added */
+extern char *copybuf; /* buffer to copy snapshot blocks */
extern int cvtlevel; /* convert to newer file system format */
extern long dev_bsize; /* computed value of DEV_BSIZE */
extern u_int real_dev_bsize; /* actual disk sector size, not overridden */
@@ -371,6 +370,8 @@
extern int returntosingle; /* 1 => return to single user mode on exit */
extern long secsize; /* actual disk sector size */
extern char skipclean; /* skip clean file systems if preening */
+extern int snapcnt; /* number of active snapshots */
+extern struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */
extern char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */
extern int sujrecovery; /* 1 => doing check using the journal */
extern int surrender; /* Give up if reads fail */
@@ -441,7 +442,8 @@
void adjust(struct inodesc *, int lcnt);
void alarmhandler(int sig);
-ufs2_daddr_t allocblk(long frags);
+ufs2_daddr_t allocblk(long cg, long frags, ufs2_daddr_t (*checkblkavail)
+ (ufs2_daddr_t blkno, long frags));
ino_t allocdir(ino_t parent, ino_t request, int mode);
ino_t allocino(ino_t request, int type);
void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk);
@@ -459,11 +461,13 @@
struct bufarea *cglookup(int cg);
int changeino(ino_t dir, const char *name, ino_t newnum);
int check_cgmagic(int cg, struct bufarea *cgbp, int requestrebuild);
+ufs2_daddr_t checkblkavail(ufs2_daddr_t blkno, long frags);
int chkrange(ufs2_daddr_t blk, int cnt);
void ckfini(int markclean);
int ckinode(union dinode *dp, struct inodesc *);
void clri(struct inodesc *, const char *type, int flag);
int clearentry(struct inodesc *);
+void copyonwrite(struct fs *, struct bufarea *);
void direrror(ino_t ino, const char *errmesg);
int dirscan(struct inodesc *);
int dofix(struct inodesc *, const char *msg);
@@ -505,6 +509,7 @@
void pass5(void);
void pfatal(const char *fmt, ...) __printflike(1, 2);
void propagate(void);
+void prtbuf(struct bufarea *, const char *, ...) __printflike(2, 3);
void prtinode(struct inode *);
void pwarn(const char *fmt, ...) __printflike(1, 2);
int readsb(void);
@@ -513,6 +518,10 @@
void sblock_init(void);
void setinodebuf(int, ino_t);
int setup(char *dev);
+int snapblkfree(struct fs *, ufs2_daddr_t, long, ino_t,
+ ufs2_daddr_t (*)(ufs2_daddr_t, long));
+void snapremove(ino_t);
+void snapflush(void);
void gjournal_check(const char *filesys);
int suj_check(const char *filesys);
void update_maps(struct cg *, struct cg*, int);
diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c
--- a/sbin/fsck_ffs/fsutil.c
+++ b/sbin/fsck_ffs/fsutil.c
@@ -71,7 +71,6 @@
static void slowio_start(void);
static void slowio_end(void);
static void printIOstats(void);
-static void prtbuf(const char *, struct bufarea *);
static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */
static struct timespec startpass, finishpass;
@@ -79,6 +78,7 @@
int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */
int slowio_pollcnt;
static struct bufarea cgblk; /* backup buffer for cylinder group blocks */
+static struct bufarea failedbuf; /* returned by failed getdatablk() */
static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */
static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */
static int numbufs; /* size of buffer cache */
@@ -187,6 +187,9 @@
{
int i;
+ initbarea(&failedbuf, BT_UNKNOWN);
+ failedbuf.b_errs = -1;
+ failedbuf.b_un.b_buf = NULL;
if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL)
errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize);
initbarea(&cgblk, BT_CYLGRP);
@@ -300,7 +303,7 @@
}
/*
- * Manage a cache of directory blocks.
+ * Manage a cache of filesystem disk blocks.
*/
struct bufarea *
getdatablk(ufs2_daddr_t blkno, long size, int type)
@@ -309,16 +312,19 @@
struct bufhash *bhdp;
cachelookups++;
- /* If out of range, return empty buffer with b_err == -1 */
- if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) {
- blkno = -1;
- type = BT_EMPTY;
- }
+ /*
+ * If out of range, return empty buffer with b_err == -1
+ *
+ * Skip check for inodes because chkrange() considers
+ * metadata areas invalid to write data.
+ */
+ if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize))
+ return (&failedbuf);
bhdp = &bufhashhd[HASH(blkno)];
LIST_FOREACH(bp, bhdp, b_hash)
if (bp->b_bno == fsbtodb(&sblock, blkno)) {
if (debug && bp->b_size != size) {
- prtbuf("getdatablk: size mismatch", bp);
+ prtbuf(bp, "getdatablk: size mismatch");
pfatal("getdatablk: b_size %d != size %ld\n",
bp->b_size, size);
}
@@ -378,7 +384,7 @@
if (debug && bp->b_type != type) {
printf("getdatablk: buffer type changed to %s",
BT_BUFTYPE(type));
- prtbuf("", bp);
+ prtbuf(bp, "");
}
TAILQ_REMOVE(&bufqueuehd, bp, b_list);
TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
@@ -401,11 +407,7 @@
readcnt[bp->b_type]++;
clock_gettime(CLOCK_REALTIME_PRECISE, &start);
}
- if (bp->b_type != BT_EMPTY)
- bp->b_errs =
- blread(fsreadfd, bp->b_un.b_buf, dblk, size);
- else
- bp->b_errs = -1;
+ bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size);
if (debug) {
clock_gettime(CLOCK_REALTIME_PRECISE, &finish);
timespecsub(&finish, &start, &finish);
@@ -422,7 +424,7 @@
{
if (bp->b_refcnt <= 0)
- prtbuf("brelse: buffer with negative reference count", bp);
+ prtbuf(bp, "brelse: buffer with negative reference count");
bp->b_refcnt--;
}
@@ -451,10 +453,18 @@
if (bp != &sblk)
pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n",
bp, &sblk);
+ /*
+ * Superblocks are always pre-copied so we do not need
+ * to check them for copy-on-write.
+ */
if (sbput(fd, bp->b_un.b_fs, 0) == 0)
fsmodified = 1;
break;
case BT_CYLGRP:
+ /*
+ * Cylinder groups are always pre-copied so we do not
+ * need to check them for copy-on-write.
+ */
if (sujrecovery)
cg_write(bp);
if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0)
@@ -483,11 +493,38 @@
}
/* FALLTHROUGH */
default:
+ copyonwrite(&sblock, bp);
blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size);
break;
}
}
+/*
+ * If there are any snapshots, ensure that all the blocks that they
+ * care about have been copied, then release the snapshot inodes.
+ * These operations need to be done before we rebuild the cylinder
+ * groups so that any block allocations are properly recorded.
+ * Since all the cylinder group maps have already been copied in
+ * the snapshots, no further snapshot copies will need to be done.
+ */
+void
+snapflush(void)
+{
+ struct bufarea *bp;
+ int cnt;
+
+ if (snapcnt > 0) {
+ if (debug)
+ printf("Check for snapshot copies\n");
+ TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
+ if ((bp->b_flags & B_DIRTY) != 0)
+ copyonwrite(&sblock, bp);
+ for (cnt = 0; cnt < snapcnt; cnt++)
+ irelse(&snaplist[cnt]);
+ snapcnt = 0;
+ }
+}
+
/*
* Journaled soft updates does not maintain cylinder group summary
* information during cleanup, so this routine recalculates the summary
@@ -503,6 +540,7 @@
int blk;
int i;
+ snapflush();
/*
* Fix the frag and cluster summary.
*/
@@ -587,6 +625,7 @@
(void)close(fsreadfd);
return;
}
+
/*
* To remain idempotent with partial truncations the buffers
* must be flushed in this order:
@@ -629,14 +668,9 @@
case BT_SUPERBLK:
case BT_CYLGRP:
default:
- prtbuf("ckfini: improper buffer type on cache list",bp);
+ prtbuf(bp,"ckfini: improper buffer type on cache list");
continue;
/* These are the ones to flush in this step */
- case BT_EMPTY:
- if (bp->b_bno >= 0)
- pfatal("Unused BT_EMPTY buffer for block %jd\n",
- (intmax_t)bp->b_bno);
- /* FALLTHROUGH */
case BT_LEVEL1:
case BT_LEVEL2:
case BT_LEVEL3:
@@ -649,10 +683,11 @@
continue;
}
if (debug && bp->b_refcnt != 0) {
- prtbuf("ckfini: clearing in-use buffer", bp);
+ prtbuf(bp, "ckfini: clearing in-use buffer");
pfatal("ckfini: clearing in-use buffer\n");
}
TAILQ_REMOVE(&bufqueuehd, bp, b_list);
+ LIST_REMOVE(bp, b_hash);
cnt++;
flush(fswritefd, bp);
free(bp->b_un.b_buf);
@@ -667,10 +702,11 @@
}
TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
if (debug && bp->b_refcnt != 0) {
- prtbuf("ckfini: clearing in-use buffer", bp);
+ prtbuf(bp, "ckfini: clearing in-use buffer");
pfatal("ckfini: clearing in-use buffer\n");
}
TAILQ_REMOVE(&bufqueuehd, bp, b_list);
+ LIST_REMOVE(bp, b_hash);
cnt++;
flush(fswritefd, bp);
free(bp->b_un.b_buf);
@@ -1050,45 +1086,73 @@
* allocate a data block with the specified number of fragments
*/
ufs2_daddr_t
-allocblk(long frags)
+allocblk(long startcg, long frags,
+ ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
{
- int i, j, k, cg, baseblk;
- struct bufarea *cgbp;
- struct cg *cgp;
+ ufs2_daddr_t blkno, newblk;
if (frags <= 0 || frags > sblock.fs_frag)
return (0);
- for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) {
- for (j = 0; j <= sblock.fs_frag - frags; j++) {
- if (testbmap(i + j))
- continue;
- for (k = 1; k < frags; k++)
- if (testbmap(i + j + k))
- break;
- if (k < frags) {
- j += k;
- continue;
- }
- cg = dtog(&sblock, i + j);
- cgbp = cglookup(cg);
- cgp = cgbp->b_un.b_cg;
- if (!check_cgmagic(cg, cgbp, 0)) {
- i = (cg + 1) * sblock.fs_fpg - sblock.fs_frag;
- continue;
- }
- baseblk = dtogd(&sblock, i + j);
- for (k = 0; k < frags; k++) {
- setbmap(i + j + k);
- clrbit(cg_blksfree(cgp), baseblk + k);
- }
- n_blks += frags;
- if (frags == sblock.fs_frag)
- cgp->cg_cs.cs_nbfree--;
- else
- cgp->cg_cs.cs_nffree -= frags;
- cgdirty(cgbp);
- return (i + j);
+ for (blkno = cgdata(&sblock, startcg);
+ blkno < maxfsblock - sblock.fs_frag;
+ blkno += sblock.fs_frag) {
+ if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
+ continue;
+ if (newblk > 0)
+ return (newblk);
+ if (newblk < 0)
+ blkno = -newblk;
+ }
+ for (blkno = cgdata(&sblock, 0);
+ blkno < cgbase(&sblock, startcg) - sblock.fs_frag;
+ blkno += sblock.fs_frag) {
+ if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
+ continue;
+ if (newblk > 0)
+ return (newblk);
+ if (newblk < 0)
+ blkno = -newblk;
+ }
+ return (0);
+}
+
+ufs2_daddr_t
+checkblkavail(blkno, frags)
+ ufs2_daddr_t blkno;
+ long frags;
+{
+ struct bufarea *cgbp;
+ struct cg *cgp;
+ ufs2_daddr_t j, k, baseblk;
+ long cg;
+
+ for (j = 0; j <= sblock.fs_frag - frags; j++) {
+ if (testbmap(blkno + j))
+ continue;
+ for (k = 1; k < frags; k++)
+ if (testbmap(blkno + j + k))
+ break;
+ if (k < frags) {
+ j += k;
+ continue;
+ }
+ cg = dtog(&sblock, blkno + j);
+ cgbp = cglookup(cg);
+ cgp = cgbp->b_un.b_cg;
+ if (!check_cgmagic(cg, cgbp, 0))
+ return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
+ baseblk = dtogd(&sblock, blkno + j);
+ for (k = 0; k < frags; k++) {
+ setbmap(blkno + j + k);
+ clrbit(cg_blksfree(cgp), baseblk + k);
}
+ n_blks += frags;
+ if (frags == sblock.fs_frag)
+ cgp->cg_cs.cs_nbfree--;
+ else
+ cgp->cg_cs.cs_nffree -= frags;
+ cgdirty(cgbp);
+ return (blkno + j);
}
return (0);
}
@@ -1261,14 +1325,19 @@
/*
* Print details about a buffer.
*/
-static void
-prtbuf(const char *msg, struct bufarea *bp)
+void
+prtbuf(struct bufarea *bp, const char *fmt, ...)
{
-
- printf("%s: bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
- "index %jd\n", msg, bp, BT_BUFTYPE(bp->b_type),
- (intmax_t) bp->b_bno, bp->b_size, bp->b_refcnt,
- bp->b_flags & B_DIRTY ? "dirty" : "clean", (intmax_t) bp->b_index);
+ va_list ap;
+ va_start(ap, fmt);
+ if (preen)
+ (void)fprintf(stdout, "%s: ", cdevname);
+ (void)vfprintf(stdout, fmt, ap);
+ va_end(ap);
+ printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
+ "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno,
+ bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean",
+ (intmax_t) bp->b_index);
}
/*
diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c
--- a/sbin/fsck_ffs/inode.c
+++ b/sbin/fsck_ffs/inode.c
@@ -38,6 +38,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/stat.h>
#include <sys/stdint.h>
#include <sys/sysctl.h>
@@ -58,6 +59,8 @@
static int iblock(struct inodesc *, off_t isize, int type);
static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
struct bufarea **);
+static int snapclean(struct inodesc *idesc);
+static void chkcopyonwrite(struct fs *, ufs2_daddr_t);
int
ckinode(union dinode *dp, struct inodesc *idesc)
@@ -378,8 +381,12 @@
int c;
if (cnt <= 0 || blk <= 0 || blk > maxfsblock ||
- cnt - 1 > maxfsblock - blk)
+ cnt - 1 > maxfsblock - blk) {
+ if (debug)
+ printf("out of range: blk %ld, offset %i, size %d\n",
+ (long)blk, (int)fragnum(&sblock, blk), cnt);
return (1);
+ }
if (cnt > sblock.fs_frag ||
fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
if (debug)
@@ -651,10 +658,17 @@
{
struct dups *dlp;
ufs2_daddr_t blkno;
- long nfrags, res;
+ long size, nfrags, res;
res = KEEPON;
blkno = idesc->id_blkno;
+ if (idesc->id_type == SNAP) {
+ pfatal("clearing a snapshot dinode\n");
+ return (STOP);
+ }
+ size = lfragtosize(&sblock, idesc->id_numfrags);
+ if (snapblkfree(&sblock, blkno, size, idesc->id_number, checkblkavail))
+ return (res);
for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
if (chkrange(blkno, 1)) {
res = SKIP;
@@ -677,9 +691,323 @@
return (res);
}
+/*
+ * Prepare a snapshot file for being removed.
+ */
+void
+snapremove(ino_t inum)
+{
+ struct inodesc idesc;
+ struct inode ip;
+ int i;
+
+ for (i = 0; i < snapcnt; i++)
+ if (snaplist[i].i_number == inum)
+ break;
+ ip = snaplist[i];
+ if (i == snapcnt || (DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
+ printf("snapremove: inode %jd is not a snapshot\n",
+ (intmax_t)inum);
+ return;
+ }
+ /*
+ * Remove from active snapshot list.
+ */
+ for (i++; i < FSMAXSNAP; i++) {
+ if (sblock.fs_snapinum[i] == 0)
+ break;
+ snaplist[i - 1] = snaplist[i];
+ sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
+ }
+ sblock.fs_snapinum[i - 1] = 0;
+ bzero(&snaplist[i - 1], sizeof(struct inode));
+ snapcnt--;
+ idesc.id_type = SNAP;
+ idesc.id_func = snapclean;
+ idesc.id_number = inum;
+ (void)ckinode(ip.i_dp, &idesc);
+ DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
+ inodirty(&ip);
+ irelse(&ip);
+}
+
+static int
+snapclean(struct inodesc *idesc)
+{
+ ufs2_daddr_t blkno;
+ struct bufarea *bp;
+ union dinode *dp;
+
+ blkno = idesc->id_blkno;
+ if (blkno == 0)
+ return (KEEPON);
+
+ bp = idesc->id_bp;
+ dp = idesc->id_dp;
+ if (blkno == BLK_NOCOPY || blkno == BLK_SNAP ||
+ (blkno == blkstofrags(&sblock, idesc->id_lbn) &&
+ snapblkfree(&sblock, blkno, sblock.fs_bsize, idesc->id_number,
+ checkblkavail))) {
+ if (idesc->id_lbn < UFS_NDADDR)
+ DIP_SET(dp, di_db[idesc->id_lbn], 0);
+ else
+ IBLK_SET(bp, bp->b_index, 0);
+ if (blkno == blkstofrags(&sblock, idesc->id_lbn))
+ DIP_SET(dp, di_blocks, DIP(dp, di_blocks) -
+ btodb(sblock.fs_bsize));
+ dirty(bp);
+ }
+ return (KEEPON);
+}
+
+/*
+ * Notification that a block is being freed. Return zero if the free
+ * should be allowed to proceed. Return non-zero if the snapshot file
+ * wants to claim the block. The block will be claimed if it is an
+ * uncopied part of one of the snapshots. It will be freed if it is
+ * either a BLK_NOCOPY or has already been copied in all of the snapshots.
+ * If a fragment is being freed, then all snapshots that care about
+ * it must make a copy since a snapshot file can only claim full sized
+ * blocks. Note that if more than one snapshot file maps the block,
+ * we can pick one at random to claim it. Since none of the snapshots
+ * can change, we are assurred that they will all see the same unmodified
+ * image. When deleting a snapshot file (see ino_trunc above), we
+ * must push any of these claimed blocks to one of the other snapshots
+ * that maps it. These claimed blocks are easily identified as they will
+ * have a block number equal to their logical block number within the
+ * snapshot. A copied block can never have this property because they
+ * must always have been allocated from a BLK_NOCOPY location.
+ */
+int
+snapblkfree(fs, bno, size, inum, checkblkavail)
+ struct fs *fs;
+ ufs2_daddr_t bno;
+ long size;
+ ino_t inum;
+ ufs2_daddr_t (*checkblkavail)(long cg, long frags);
+{
+ union dinode *dp;
+ struct inode ip;
+ struct bufarea *snapbp;
+ ufs_lbn_t lbn;
+ ufs2_daddr_t blkno, relblkno;
+ int i, frags, claimedblk, copydone;
+
+ /* If no snapshots, nothing to do */
+ if (snapcnt == 0)
+ return (0);
+ if (debug)
+ printf("snapblkfree: in ino %ld free blkno %ld, size %ld\n",
+ inum, bno, size);
+ relblkno = blknum(fs, bno);
+ lbn = fragstoblks(fs, relblkno);
+ /* Direct blocks are always pre-copied */
+ if (lbn < UFS_NDADDR)
+ return (0);
+ copydone = 0;
+ claimedblk = 0;
+ for (i = 0; i < snapcnt; i++) {
+ /*
+ * Lookup block being freed.
+ */
+ ip = snaplist[i];
+ dp = ip.i_dp;
+ blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
+ lbn, &frags, &snapbp);
+ /*
+ * Check to see if block needs to be copied.
+ */
+ if (blkno == 0) {
+ /*
+ * A block that we map is being freed. If it has not
+ * been claimed yet, we will claim or copy it (below).
+ */
+ claimedblk = 1;
+ } else if (blkno == BLK_SNAP) {
+ /*
+ * No previous snapshot claimed the block,
+ * so it will be freed and become a BLK_NOCOPY
+ * (don't care) for us.
+ */
+ if (claimedblk)
+ pfatal("snapblkfree: inconsistent block type");
+ IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
+ dirty(snapbp);
+ brelse(snapbp);
+ continue;
+ } else /* BLK_NOCOPY or default */ {
+ /*
+ * If the snapshot has already copied the block
+ * (default), or does not care about the block,
+ * it is not needed.
+ */
+ brelse(snapbp);
+ continue;
+ }
+ /*
+ * If this is a full size block, we will just grab it
+ * and assign it to the snapshot inode. Otherwise we
+ * will proceed to copy it. See explanation for this
+ * routine as to why only a single snapshot needs to
+ * claim this block.
+ */
+ if (size == fs->fs_bsize) {
+ if (debug)
+ printf("Grabonremove snapshot %ju lbn %jd "
+ "from inum %ju\n", (intmax_t)ip.i_number,
+ (intmax_t)lbn, (uintmax_t)inum);
+ IBLK_SET(snapbp, snapbp->b_index, relblkno);
+ dirty(snapbp);
+ brelse(snapbp);
+ DIP_SET(dp, di_blocks,
+ DIP(dp, di_blocks) + btodb(size));
+ inodirty(&ip);
+ return (1);
+ }
+
+ /* First time through, read the contents of the old block. */
+ if (copydone == 0) {
+ copydone = 1;
+ if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
+ fs->fs_bsize) != 0) {
+ pfatal("Could not read snapshot %ju block "
+ "%jd\n", (intmax_t)ip.i_number,
+ (intmax_t)relblkno);
+ continue;
+ }
+ }
+ /*
+ * This allocation will never require any additional
+ * allocations for the snapshot inode.
+ */
+ blkno = (*allocblk)(dtog(fs, relblkno), fs->fs_frag,
+ checkblkavail);
+ if (blkno == 0) {
+ pfatal("Could not allocate block for snapshot %ju\n",
+ (intmax_t)ip.i_number);
+ continue;
+ }
+ if (debug)
+ printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
+ "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
+ (intmax_t)lbn, (uintmax_t)inum, size,
+ (intmax_t)blkno);
+ blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
+ IBLK_SET(snapbp, snapbp->b_index, blkno);
+ dirty(snapbp);
+ brelse(snapbp);
+ DIP_SET(dp, di_blocks,
+ DIP(dp, di_blocks) + btodb(fs->fs_bsize));
+ inodirty(&ip);
+ }
+ return (0);
+}
+
+/*
+ * Notification that a block is being written. Return if the block
+ * is part of a snapshot as snapshots never track other snapshots.
+ * The block will be copied in all of the snapshots that are tracking
+ * it and have not yet copied it. Some buffers may hold more than one
+ * block. Here we need to check each block in the buffer.
+ */
+void
+copyonwrite(fs, bp)
+ struct fs *fs;
+ struct bufarea *bp;
+{
+ ufs2_daddr_t copyblkno;
+ long i, numblks;
+
+ /* If no snapshots, nothing to do. */
+ if (snapcnt == 0)
+ return;
+ numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
+ if (debug)
+ prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
+ numblks, numblks > 1 ? "s" : "");
+ copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
+ for (i = 0; i < numblks; i++) {
+ chkcopyonwrite(fs, copyblkno);
+ copyblkno += fs->fs_frag;
+ }
+}
+
+static void
+chkcopyonwrite(fs, copyblkno)
+ struct fs *fs;
+ ufs2_daddr_t copyblkno;
+{
+ struct inode ip;
+ union dinode *dp;
+ struct bufarea *snapbp;
+ ufs2_daddr_t blkno;
+ int i, frags, copydone;
+ ufs_lbn_t lbn;
+
+ lbn = fragstoblks(fs, copyblkno);
+ /* Direct blocks are always pre-copied */
+ if (lbn < UFS_NDADDR)
+ return;
+ copydone = 0;
+ for (i = 0; i < snapcnt; i++) {
+ /*
+ * Lookup block being freed.
+ */
+ ip = snaplist[i];
+ dp = ip.i_dp;
+ blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
+ /*
+ * Check to see if block needs to be copied.
+ */
+ if (blkno != 0) {
+ /*
+ * A block that we have already copied or don't track.
+ */
+ brelse(snapbp);
+ continue;
+ }
+ /* First time through, read the contents of the old block. */
+ if (copydone == 0) {
+ copydone = 1;
+ if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
+ fs->fs_bsize) != 0) {
+ pfatal("Could not read snapshot %ju block "
+ "%jd\n", (intmax_t)ip.i_number,
+ (intmax_t)copyblkno);
+ continue;
+ }
+ }
+ /*
+ * This allocation will never require any additional
+ * allocations for the snapshot inode.
+ */
+ if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
+ checkblkavail)) == 0) {
+ pfatal("Could not allocate block for snapshot %ju\n",
+ (intmax_t)ip.i_number);
+ continue;
+ }
+ if (debug)
+ prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
+ "blkno %ju setting in buffer",
+ (intmax_t)ip.i_number, (intmax_t)lbn,
+ (intmax_t)blkno);
+ blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
+ IBLK_SET(snapbp, snapbp->b_index, blkno);
+ dirty(snapbp);
+ brelse(snapbp);
+ DIP_SET(dp, di_blocks,
+ DIP(dp, di_blocks) + btodb(fs->fs_bsize));
+ inodirty(&ip);
+ }
+ return;
+}
+
void
freeinodebuf(void)
{
+ struct bufarea *bp;
+ int i;
/*
* Flush old contents in case they have been updated.
@@ -689,6 +1017,14 @@
free((char *)inobuf.b_un.b_buf);
inobuf.b_un.b_buf = NULL;
firstinum = lastinum = 0;
+ /*
+ * Reload the snapshot inodes in case any of them changed.
+ */
+ for (i = 0; i < snapcnt; i++) {
+ bp = snaplist[i].i_bp;
+ bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
+ bp->b_size);
+ }
}
/*
@@ -803,6 +1139,10 @@
printf(" (CLEARED)\n");
n_files--;
if (bkgrdflag == 0) {
+ if (idesc->id_type == SNAP) {
+ snapremove(idesc->id_number);
+ idesc->id_type = ADDR;
+ }
(void)ckinode(dp, idesc);
inoinfo(idesc->id_number)->ino_state = USTATE;
clearinode(dp);
@@ -967,7 +1307,8 @@
cgdirty(cgbp);
ginode(ino, &ip);
dp = ip.i_dp;
- DIP_SET(dp, di_db[0], allocblk((long)1));
+ DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
+ checkblkavail));
if (DIP(dp, di_db[0]) == 0) {
inoinfo(ino)->ino_state = USTATE;
irelse(&ip);
diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c
--- a/sbin/fsck_ffs/main.c
+++ b/sbin/fsck_ffs/main.c
@@ -491,6 +491,7 @@
*/
if (preen == 0)
printf("** Phase 5 - Check Cyl groups\n");
+ snapflush();
pass5();
IOstats("Pass5");
diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c
--- a/sbin/fsck_ffs/setup.c
+++ b/sbin/fsck_ffs/setup.c
@@ -59,6 +59,9 @@
#include "fsck.h"
struct inoinfo **inphead, **inpsort; /* info about all inodes */
+struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */
+int snapcnt; /* number of active snapshots */
+char *copybuf; /* buffer to copy snapshot blocks */
static int sbhashfailed;
#define POWEROF2(num) (((num) & ((num) - 1)) == 0)
@@ -66,6 +69,8 @@
static int calcsb(char *dev, int devfd, struct fs *fs);
static void saverecovery(int readfd, int writefd);
static int chkrecovery(int devfd);
+static int getlbnblkno(struct inodesc *);
+static int checksnapinfo(struct inode *);
/*
* Read in a superblock finding an alternate if necessary.
@@ -75,7 +80,8 @@
int
setup(char *dev)
{
- long bmapsize;
+ long i, bmapsize;
+ struct inode ip;
/*
* We are expected to have an open file descriptor and a superblock.
@@ -174,6 +180,39 @@
usedsoftdep = 1;
else
usedsoftdep = 0;
+ /*
+ * Collect any snapshot inodes so that we can allow them to
+ * claim any blocks that we free. The code for doing this is
+ * imported here and into inode.c from sys/ufs/ffs/ffs_snapshot.c.
+ */
+ for (snapcnt = 0; snapcnt < FSMAXSNAP; snapcnt++) {
+ if (sblock.fs_snapinum[snapcnt] == 0)
+ break;
+ ginode(sblock.fs_snapinum[snapcnt], &ip);
+ if ((DIP(ip.i_dp, di_mode) & IFMT) == IFREG &&
+ (DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) != 0 &&
+ checksnapinfo(&ip)) {
+ snaplist[snapcnt] = ip;
+ continue;
+ }
+ printf("Removing non-snapshot inode %ju from snapshot list\n",
+ (uintmax_t)sblock.fs_snapinum[snapcnt]);
+ irelse(&ip);
+ for (i = snapcnt + 1; i < FSMAXSNAP; i++) {
+ if (sblock.fs_snapinum[i] == 0)
+ break;
+ sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
+ }
+ sblock.fs_snapinum[i - 1] = 0;
+ snapcnt--;
+ sbdirty();
+ }
+ if (snapcnt > 0 && copybuf == NULL) {
+ copybuf = Malloc(sblock.fs_bsize);
+ if (copybuf == NULL)
+ errx(EEXIT, "cannot allocate space for snapshot "
+ "copy buffer");
+ }
return (1);
badsb:
@@ -181,6 +220,100 @@
return (0);
}
+/*
+ * Check for valid snapshot information.
+ */
+#define CHKBLKINLIST(chkblk) \
+ if ((chkblk) >= UFS_NDADDR) { \
+ while (*blkp < (chkblk) && blkp < lastblkp) \
+ blkp++; \
+ if (blkp >= lastblkp) { \
+ pwarn("UFS%d snapshot inode %jd failed: " \
+ "improper block list length (%jd)\n", \
+ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \
+ (intmax_t)snapip->i_number, \
+ (intmax_t)(lastblkp - &snapblklist[0])); \
+ status = 0; \
+ goto fail; \
+ } \
+ if (*blkp++ != (chkblk)) { \
+ pwarn("UFS%d snapshot inode %jd failed: " \
+ "block list (%jd) != %s (%jd)\n", \
+ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \
+ (intmax_t)snapip->i_number, \
+ (intmax_t)blkp[-1], #chkblk, \
+ (intmax_t)chkblk); \
+ status = 0; \
+ goto fail; \
+ } \
+ }
+
+static int
+checksnapinfo(struct inode *snapip)
+{
+ struct fs *fs;
+ struct bufarea *bp;
+ struct inodesc idesc;
+ daddr_t *snapblklist, *blkp, *lastblkp, csblkno;
+ int cg, loc, len, status;
+ ufs_lbn_t lbn;
+ size_t size;
+
+ fs = &sblock;
+ memset(&idesc, 0, sizeof(struct inodesc));
+ idesc.id_type = ADDR;
+ idesc.id_func = getlbnblkno;
+ idesc.id_number = snapip->i_number;
+ lbn = howmany(fs->fs_size, fs->fs_frag);
+ idesc.id_parent = lbn; /* sought after blkno */
+ if ((ckinode(snapip->i_dp, &idesc) & FOUND) == 0)
+ return (0);
+ size = fragroundup(fs,
+ DIP(snapip->i_dp, di_size) - lblktosize(fs, lbn));
+ bp = getdatablk(idesc.id_parent, size, BT_DATA);
+ snapblklist = (daddr_t *)bp->b_un.b_buf;
+ /*
+ * Check that superblock and all cylinder groups are listed.
+ */
+ status = 1;
+ blkp = &snapblklist[1];
+ lastblkp = &snapblklist[MAX(0,
+ MIN(snapblklist[0] + 1, size / sizeof(daddr_t)))];
+printf("max chk %ld, list size %ld space size %ld\n",
+ MAX(0, MIN(snapblklist[0] + 1, size / sizeof(daddr_t))),
+ snapblklist[0] + 1, size / sizeof(daddr_t));
+ CHKBLKINLIST(lblkno(fs, fs->fs_sblockloc));
+ csblkno = fragstoblks(fs, fs->fs_csaddr);
+ for (cg = 0; cg < fs->fs_ncg; cg++) {
+ if (fragstoblks(fs, cgtod(fs, cg)) > csblkno)
+ break;
+ CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg)));
+ }
+ len = howmany(fs->fs_cssize, fs->fs_bsize);
+ for (loc = 0; loc < len; loc++)
+ CHKBLKINLIST(csblkno + loc);
+ for (; cg < fs->fs_ncg; cg++)
+ CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg)));
+fail:
+ brelse(bp);
+ return (status);
+}
+
+/*
+ * Return the block number associated with a specified inode lbn.
+ * Requested lbn is in id_parent. If found, block is returned in
+ * id_parent.
+ */
+static int
+getlbnblkno(struct inodesc *idesc)
+{
+
+ if (idesc->id_lbn < idesc->id_parent)
+ return (KEEPON);
+ idesc->id_parent = idesc->id_blkno;
+ return (STOP | FOUND);
+}
+
/*
* Open a device or file to be checked by fsck.
*/
diff --git a/sbin/fsck_ffs/suj.c b/sbin/fsck_ffs/suj.c
--- a/sbin/fsck_ffs/suj.c
+++ b/sbin/fsck_ffs/suj.c
@@ -321,7 +321,7 @@
* To be certain we're not freeing a reallocated block we lookup
* this block in the blk hash and see if there is an allocation
* journal record that overlaps with any fragments in the block
- * we're concerned with. If any fragments have ben reallocated
+ * we're concerned with. If any fragments have been reallocated
* the block has already been freed and re-used for another purpose.
*/
mask = 0;
@@ -378,6 +378,50 @@
return (0);
}
+/*
+ * Check to see if the requested block is available.
+ * We can just check in the cylinder-group maps as
+ * they will only have usable blocks in them.
+ */
+static ufs2_daddr_t
+suj_checkblkavail(blkno, frags)
+ ufs2_daddr_t blkno;
+ long frags;
+{
+ struct bufarea *cgbp;
+ struct cg *cgp;
+ ufs2_daddr_t j, k, baseblk;
+ long cg;
+
+ cg = dtog(&sblock, blkno);
+ cgbp = cglookup(cg);
+ cgp = cgbp->b_un.b_cg;
+ if (!check_cgmagic(cg, cgbp, 0))
+ return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
+ baseblk = dtogd(&sblock, blkno);
+ for (j = 0; j <= sblock.fs_frag - frags; j++) {
+ if (!isset(cg_blksfree(cgp), baseblk + j))
+ continue;
+ for (k = 1; k < frags; k++)
+ if (!isset(cg_blksfree(cgp), baseblk + j + k))
+ break;
+ if (k < frags) {
+ j += k;
+ continue;
+ }
+ for (k = 0; k < frags; k++)
+ clrbit(cg_blksfree(cgp), baseblk + j + k);
+ n_blks += frags;
+ if (frags == sblock.fs_frag)
+ cgp->cg_cs.cs_nbfree--;
+ else
+ cgp->cg_cs.cs_nffree -= frags;
+ cgdirty(cgbp);
+ return ((cg * sblock.fs_fpg) + baseblk + j);
+ }
+ return (0);
+}
+
/*
* Clear an inode from the cg bitmap. If the inode was already clear return
* 0 so the caller knows it does not have to check the inode contents.
@@ -431,6 +475,12 @@
if (debug)
printf("Freeing %d frags at blk %jd mask 0x%x\n",
frags, bno, mask);
+ /*
+ * Check to see if the block needs to be claimed by a snapshot.
+ * If wanted, the snapshot references it. Otherwise we free it.
+ */
+ if (snapblkfree(fs, bno, lfragtosize(fs, frags), 0, suj_checkblkavail))
+ return;
cg = dtog(fs, bno);
sc = cg_lookup(cg);
cgp = sc->sc_cgp;
@@ -1264,6 +1314,7 @@
if (size > 0)
err_suj("Partial truncation of ino %ju snapshot file\n",
(uintmax_t)ino);
+ snapremove(ino);
}
lastlbn = lblkno(fs, blkroundup(fs, size));
for (i = lastlbn; i < UFS_NDADDR; i++) {
@@ -1283,13 +1334,13 @@
/* If we're not freeing any in this indirect range skip it. */
if (lastlbn >= nextlbn)
continue;
- if (DIP(dp, di_ib[i]) == 0)
- continue;
- indir_trunc(ino, -lbn - i, DIP(dp, di_ib[i]), lastlbn, dp);
- /* If we freed everything in this indirect free the indir. */
- if (lastlbn > lbn)
- continue;
- blk_free(DIP(dp, di_ib[i]), 0, fs->fs_frag);
+ if ((bn = DIP(dp, di_ib[i])) == 0)
+ continue;
+ indir_trunc(ino, -lbn - i, bn, lastlbn, dp);
+ /* If we freed everything in this indirect free the indir. */
+ if (lastlbn > lbn)
+ continue;
+ blk_free(bn, 0, fs->fs_frag);
DIP_SET(dp, di_ib[i], 0);
}
/*
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Jan 12, 10:56 AM (56 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15768722
Default Alt Text
D36491.id111318.diff (34 KB)
Attached To
Mode
D36491: Add support for managing UFS/FFS snapshots to fsck_ffs(8)
Attached
Detach File
Event Timeline
Log In to Comment