Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102811634
D36491.id112753.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
44 KB
Referenced Files
None
Subscribers
None
D36491.id112753.diff
View Options
diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c
--- a/sbin/fsck_ffs/dir.c
+++ b/sbin/fsck_ffs/dir.c
@@ -679,14 +679,17 @@
struct bufarea *bp, *nbp;
struct inodesc idesc;
union dinode *dp;
- int indiralloced;
+ long cg, indiralloced;
char *cp;
nbp = NULL;
indiralloced = newblk = indirblk = 0;
+ memset(&idesc, 0, sizeof(struct inodesc));
+ idesc.id_type = ADDR;
pwarn("NO SPACE LEFT IN %s", name);
if (!preen && reply("EXPAND") == 0)
return (0);
+ cg = ino_to_cg(&sblock, ip->i_number);
dp = ip->i_dp;
filesize = DIP(dp, di_size);
lastlbn = lblkno(&sblock, filesize);
@@ -705,7 +708,8 @@
bp = getdirblk(oldblk, lastlbnsize);
if (bp->b_errs)
goto bad;
- if ((newblk = allocblk(sblock.fs_frag)) == 0)
+ newblk = allocblk(cg, sblock.fs_frag, std_checkblkavail);
+ if (newblk == 0)
goto bad;
nbp = getdatablk(newblk, sblock.fs_bsize, BT_DIRDATA);
if (nbp->b_errs)
@@ -724,6 +728,7 @@
memmove(cp, &emptydir, sizeof emptydir);
dirty(nbp);
brelse(nbp);
+ binval(bp);
idesc.id_blkno = oldblk;
idesc.id_numfrags = numfrags(&sblock, lastlbnsize);
(void)freeblock(&idesc);
@@ -731,7 +736,7 @@
printf(" (EXPANDED)\n");
return (1);
}
- if ((newblk = allocblk(sblock.fs_frag)) == 0)
+ if ((newblk = allocblk(cg, sblock.fs_frag, std_checkblkavail)) == 0)
goto bad;
bp = getdirblk(newblk, sblock.fs_bsize);
if (bp->b_errs)
@@ -749,8 +754,12 @@
* Allocate indirect block if needed.
*/
if ((indirblk = DIP(dp, di_ib[0])) == 0) {
- if ((indirblk = allocblk(sblock.fs_frag)) == 0)
+ indirblk = allocblk(cg, sblock.fs_frag,
+ std_checkblkavail);
+ if (indirblk == 0) {
+ binval(bp);
goto bad;
+ }
indiralloced = 1;
}
nbp = getdatablk(indirblk, sblock.fs_bsize, BT_LEVEL1);
@@ -774,8 +783,10 @@
return (1);
bad:
pfatal(" (EXPANSION FAILED)\n");
- if (nbp != NULL)
+ if (nbp != NULL) {
+ binval(bp);
brelse(nbp);
+ }
if (newblk != 0) {
idesc.id_blkno = newblk;
idesc.id_numfrags = sblock.fs_frag;
diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h
--- a/sbin/fsck_ffs/fsck.h
+++ b/sbin/fsck_ffs/fsck.h
@@ -200,8 +200,7 @@
#define BT_INODES 7 /* Buffer holds inodes */
#define BT_DIRDATA 8 /* Buffer holds directory data */
#define BT_DATA 9 /* Buffer holds user data */
-#define BT_EMPTY 10 /* Buffer allocated but not filled */
-#define BT_NUMBUFTYPES 11
+#define BT_NUMBUFTYPES 10
#define BT_NAMES { \
"unknown", \
"Superblock", \
@@ -212,8 +211,7 @@
"External Attribute", \
"Inode Block", \
"Directory Contents", \
- "User Data", \
- "Allocated but not filled" }
+ "User Data" }
extern char *buftype[];
#define BT_BUFTYPE(type) \
type < BT_NUMBUFTYPES ? buftype[type] : buftype[BT_UNKNOWN]
@@ -234,7 +232,7 @@
(bp)->b_flags |= B_DIRTY; \
} while (0)
#define initbarea(bp, type) do { \
- (bp)->b_bno = (ufs2_daddr_t)-1; \
+ (bp)->b_bno = (ufs2_daddr_t)-4; \
(bp)->b_size = 0; \
(bp)->b_errs = 0; \
(bp)->b_flags = 0; \
@@ -347,6 +345,7 @@
extern char *cdevname; /* name of device being checked */
extern char ckclean; /* only do work if not cleanly unmounted */
extern int ckhashadd; /* check hashes to be added */
+extern char *copybuf; /* buffer to copy snapshot blocks */
extern int cvtlevel; /* convert to newer file system format */
extern long dev_bsize; /* computed value of DEV_BSIZE */
extern u_int real_dev_bsize; /* actual disk sector size, not overridden */
@@ -371,6 +370,8 @@
extern int returntosingle; /* 1 => return to single user mode on exit */
extern long secsize; /* actual disk sector size */
extern char skipclean; /* skip clean file systems if preening */
+extern int snapcnt; /* number of active snapshots */
+extern struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */
extern char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */
extern int sujrecovery; /* 1 => doing check using the journal */
extern int surrender; /* Give up if reads fail */
@@ -441,9 +442,11 @@
void adjust(struct inodesc *, int lcnt);
void alarmhandler(int sig);
-ufs2_daddr_t allocblk(long frags);
+ufs2_daddr_t allocblk(long cg, long frags, ufs2_daddr_t (*checkblkavail)
+ (ufs2_daddr_t blkno, long frags));
ino_t allocdir(ino_t parent, ino_t request, int mode);
ino_t allocino(ino_t request, int type);
+void binval(struct bufarea *);
void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk);
char *blockcheck(char *name);
int blread(int fd, char *buf, ufs2_daddr_t blk, long size);
@@ -458,12 +461,15 @@
void cgdirty(struct bufarea *);
struct bufarea *cglookup(int cg);
int changeino(ino_t dir, const char *name, ino_t newnum);
+void check_blkcnt(struct inode *ip);
int check_cgmagic(int cg, struct bufarea *cgbp, int requestrebuild);
int chkrange(ufs2_daddr_t blk, int cnt);
void ckfini(int markclean);
int ckinode(union dinode *dp, struct inodesc *);
void clri(struct inodesc *, const char *type, int flag);
int clearentry(struct inodesc *);
+void copyonwrite(struct fs *, struct bufarea *,
+ ufs2_daddr_t (*checkblkavail)(long, long));
void direrror(ino_t ino, const char *errmesg);
int dirscan(struct inodesc *);
int dofix(struct inodesc *, const char *msg);
@@ -476,6 +482,7 @@
int freeblock(struct inodesc *);
void freeino(ino_t ino);
void freeinodebuf(void);
+void fsckinit(void);
void fsutilinit(void);
int ftypeok(union dinode *dp);
void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size);
@@ -484,6 +491,7 @@
union dinode *getnextinode(ino_t inumber, int rebuildcg);
void getpathname(char *namebuf, ino_t curdir, ino_t ino);
void ginode(ino_t, struct inode *);
+void gjournal_check(const char *filesys);
void infohandler(int sig);
void irelse(struct inode *);
ufs2_daddr_t ino_blkatoff(union dinode *, ino_t, ufs_lbn_t, int *,
@@ -505,6 +513,7 @@
void pass5(void);
void pfatal(const char *fmt, ...) __printflike(1, 2);
void propagate(void);
+void prtbuf(struct bufarea *, const char *, ...) __printflike(2, 3);
void prtinode(struct inode *);
void pwarn(const char *fmt, ...) __printflike(1, 2);
int readsb(void);
@@ -513,9 +522,13 @@
void sblock_init(void);
void setinodebuf(int, ino_t);
int setup(char *dev);
-void gjournal_check(const char *filesys);
+int snapblkfree(struct fs *, ufs2_daddr_t, long, ino_t,
+ ufs2_daddr_t (*)(ufs2_daddr_t, long));
+void snapremove(ino_t);
+void snapflush(ufs2_daddr_t (*checkblkavail)(long, long));
+ufs2_daddr_t std_checkblkavail(ufs2_daddr_t blkno, long frags);
+ufs2_daddr_t suj_checkblkavail(ufs2_daddr_t, long);
int suj_check(const char *filesys);
void update_maps(struct cg *, struct cg*, int);
-void fsckinit(void);
#endif /* !_FSCK_H_ */
diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c
--- a/sbin/fsck_ffs/fsutil.c
+++ b/sbin/fsck_ffs/fsutil.c
@@ -71,7 +71,6 @@
static void slowio_start(void);
static void slowio_end(void);
static void printIOstats(void);
-static void prtbuf(const char *, struct bufarea *);
static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */
static struct timespec startpass, finishpass;
@@ -79,8 +78,10 @@
int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */
int slowio_pollcnt;
static struct bufarea cgblk; /* backup buffer for cylinder group blocks */
+static struct bufarea failedbuf; /* returned by failed getdatablk() */
static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */
static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */
+static struct bufhash freebufs; /* unused buffers */
static int numbufs; /* size of buffer cache */
static int cachelookups; /* number of cache lookups */
static int cachereads; /* number of cache reads */
@@ -187,11 +188,15 @@
{
int i;
+ initbarea(&failedbuf, BT_UNKNOWN);
+ failedbuf.b_errs = -1;
+ failedbuf.b_un.b_buf = NULL;
if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL)
errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize);
initbarea(&cgblk, BT_CYLGRP);
numbufs = cachelookups = cachereads = 0;
TAILQ_INIT(&bufqueuehd);
+ LIST_INIT(&freebufs);
for (i = 0; i < HASHSIZE; i++)
LIST_INIT(&bufhashhd[i]);
for (i = 0; i < BT_NUMBUFTYPES; i++) {
@@ -300,7 +305,7 @@
}
/*
- * Manage a cache of directory blocks.
+ * Manage a cache of filesystem disk blocks.
*/
struct bufarea *
getdatablk(ufs2_daddr_t blkno, long size, int type)
@@ -309,19 +314,23 @@
struct bufhash *bhdp;
cachelookups++;
- /* If out of range, return empty buffer with b_err == -1 */
- if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) {
- blkno = -1;
- type = BT_EMPTY;
- }
+ /*
+ * If out of range, return empty buffer with b_err == -1
+ *
+ * Skip check for inodes because chkrange() considers
+ * metadata areas invalid to write data.
+ */
+ if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize))
+ return (&failedbuf);
bhdp = &bufhashhd[HASH(blkno)];
LIST_FOREACH(bp, bhdp, b_hash)
if (bp->b_bno == fsbtodb(&sblock, blkno)) {
if (debug && bp->b_size != size) {
- prtbuf("getdatablk: size mismatch", bp);
+ prtbuf(bp, "getdatablk: size mismatch");
pfatal("getdatablk: b_size %d != size %ld\n",
bp->b_size, size);
}
+ TAILQ_REMOVE(&bufqueuehd, bp, b_list);
goto foundit;
}
/*
@@ -340,7 +349,9 @@
if (size > sblock.fs_bsize)
errx(EEXIT, "Excessive buffer size %ld > %d\n", size,
sblock.fs_bsize);
- if (numbufs < MINBUFS) {
+ if ((bp = LIST_FIRST(&freebufs)) != NULL) {
+ LIST_REMOVE(bp, b_hash);
+ } else if (numbufs < MINBUFS) {
bp = allocbuf("cannot create minimal buffer pool");
} else if (sujrecovery) {
/*
@@ -368,6 +379,7 @@
else
LIST_REMOVE(bp, b_hash);
}
+ TAILQ_REMOVE(&bufqueuehd, bp, b_list);
flush(fswritefd, bp);
bp->b_type = type;
LIST_INSERT_HEAD(bhdp, bp, b_hash);
@@ -375,13 +387,12 @@
cachereads++;
/* fall through */
foundit:
+ TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
if (debug && bp->b_type != type) {
printf("getdatablk: buffer type changed to %s",
BT_BUFTYPE(type));
- prtbuf("", bp);
+ prtbuf(bp, "");
}
- TAILQ_REMOVE(&bufqueuehd, bp, b_list);
- TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
if (bp->b_errs == 0)
bp->b_refcnt++;
return (bp);
@@ -401,11 +412,7 @@
readcnt[bp->b_type]++;
clock_gettime(CLOCK_REALTIME_PRECISE, &start);
}
- if (bp->b_type != BT_EMPTY)
- bp->b_errs =
- blread(fsreadfd, bp->b_un.b_buf, dblk, size);
- else
- bp->b_errs = -1;
+ bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size);
if (debug) {
clock_gettime(CLOCK_REALTIME_PRECISE, &finish);
timespecsub(&finish, &start, &finish);
@@ -422,10 +429,19 @@
{
if (bp->b_refcnt <= 0)
- prtbuf("brelse: buffer with negative reference count", bp);
+ prtbuf(bp, "brelse: buffer with negative reference count");
bp->b_refcnt--;
}
+void
+binval(struct bufarea *bp)
+{
+
+ bp->b_flags &= ~B_DIRTY;
+ LIST_REMOVE(bp, b_hash);
+ LIST_INSERT_HEAD(&freebufs, bp, b_hash);
+}
+
void
flush(int fd, struct bufarea *bp)
{
@@ -451,10 +467,18 @@
if (bp != &sblk)
pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n",
bp, &sblk);
+ /*
+ * Superblocks are always pre-copied so we do not need
+ * to check them for copy-on-write.
+ */
if (sbput(fd, bp->b_un.b_fs, 0) == 0)
fsmodified = 1;
break;
case BT_CYLGRP:
+ /*
+ * Cylinder groups are always pre-copied so we do not
+ * need to check them for copy-on-write.
+ */
if (sujrecovery)
cg_write(bp);
if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0)
@@ -483,11 +507,38 @@
}
/* FALLTHROUGH */
default:
+ copyonwrite(&sblock, bp, std_checkblkavail);
blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size);
break;
}
}
+/*
+ * If there are any snapshots, ensure that all the blocks that they
+ * care about have been copied, then release the snapshot inodes.
+ * These operations need to be done before we rebuild the cylinder
+ * groups so that any block allocations are properly recorded.
+ * Since all the cylinder group maps have already been copied in
+ * the snapshots, no further snapshot copies will need to be done.
+ */
+void
+snapflush(ufs2_daddr_t (*checkblkavail)(long, long))
+{
+ struct bufarea *bp;
+ int cnt;
+
+ if (snapcnt > 0) {
+ if (debug)
+ printf("Check for snapshot copies\n");
+ TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
+ if ((bp->b_flags & B_DIRTY) != 0)
+ copyonwrite(&sblock, bp, checkblkavail);
+ for (cnt = 0; cnt < snapcnt; cnt++)
+ irelse(&snaplist[cnt]);
+ snapcnt = 0;
+ }
+}
+
/*
* Journaled soft updates does not maintain cylinder group summary
* information during cleanup, so this routine recalculates the summary
@@ -499,6 +550,7 @@
{
ufs1_daddr_t fragno, cgbno, maxbno;
u_int8_t *blksfree;
+ struct csum *csp;
struct cg *cgp;
int blk;
int i;
@@ -536,6 +588,11 @@
* Update the superblock cg summary from our now correct values
* before writing the block.
*/
+ csp = &sblock.fs_cs(&sblock, cgp->cg_cgx);
+ sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir;
+ sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree;
+ sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree;
+ sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree;
sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs;
}
@@ -587,6 +644,7 @@
(void)close(fsreadfd);
return;
}
+
/*
* To remain idempotent with partial truncations the buffers
* must be flushed in this order:
@@ -629,14 +687,9 @@
case BT_SUPERBLK:
case BT_CYLGRP:
default:
- prtbuf("ckfini: improper buffer type on cache list",bp);
+ prtbuf(bp,"ckfini: improper buffer type on cache list");
continue;
/* These are the ones to flush in this step */
- case BT_EMPTY:
- if (bp->b_bno >= 0)
- pfatal("Unused BT_EMPTY buffer for block %jd\n",
- (intmax_t)bp->b_bno);
- /* FALLTHROUGH */
case BT_LEVEL1:
case BT_LEVEL2:
case BT_LEVEL3:
@@ -648,11 +701,10 @@
case BT_INODES:
continue;
}
- if (debug && bp->b_refcnt != 0) {
- prtbuf("ckfini: clearing in-use buffer", bp);
- pfatal("ckfini: clearing in-use buffer\n");
- }
+ if (debug && bp->b_refcnt != 0)
+ prtbuf(bp, "ckfini: clearing in-use buffer");
TAILQ_REMOVE(&bufqueuehd, bp, b_list);
+ LIST_REMOVE(bp, b_hash);
cnt++;
flush(fswritefd, bp);
free(bp->b_un.b_buf);
@@ -666,11 +718,10 @@
icachebp = NULL;
}
TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
- if (debug && bp->b_refcnt != 0) {
- prtbuf("ckfini: clearing in-use buffer", bp);
- pfatal("ckfini: clearing in-use buffer\n");
- }
+ if (debug && bp->b_refcnt != 0)
+ prtbuf(bp, "ckfini: clearing in-use buffer");
TAILQ_REMOVE(&bufqueuehd, bp, b_list);
+ LIST_REMOVE(bp, b_hash);
cnt++;
flush(fswritefd, bp);
free(bp->b_un.b_buf);
@@ -1050,45 +1101,77 @@
* allocate a data block with the specified number of fragments
*/
ufs2_daddr_t
-allocblk(long frags)
+allocblk(long startcg, long frags,
+ ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
{
- int i, j, k, cg, baseblk;
- struct bufarea *cgbp;
- struct cg *cgp;
+ ufs2_daddr_t blkno, newblk;
+ if (sujrecovery && checkblkavail == std_checkblkavail) {
+ pfatal("allocblk: std_checkblkavail used for SUJ recovery\n");
+ return (0);
+ }
if (frags <= 0 || frags > sblock.fs_frag)
return (0);
- for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) {
- for (j = 0; j <= sblock.fs_frag - frags; j++) {
- if (testbmap(i + j))
- continue;
- for (k = 1; k < frags; k++)
- if (testbmap(i + j + k))
- break;
- if (k < frags) {
- j += k;
- continue;
- }
- cg = dtog(&sblock, i + j);
- cgbp = cglookup(cg);
- cgp = cgbp->b_un.b_cg;
- if (!check_cgmagic(cg, cgbp, 0)) {
- i = (cg + 1) * sblock.fs_fpg - sblock.fs_frag;
- continue;
- }
- baseblk = dtogd(&sblock, i + j);
- for (k = 0; k < frags; k++) {
- setbmap(i + j + k);
- clrbit(cg_blksfree(cgp), baseblk + k);
- }
- n_blks += frags;
- if (frags == sblock.fs_frag)
- cgp->cg_cs.cs_nbfree--;
- else
- cgp->cg_cs.cs_nffree -= frags;
- cgdirty(cgbp);
- return (i + j);
+ for (blkno = cgdata(&sblock, startcg);
+ blkno < maxfsblock - sblock.fs_frag;
+ blkno += sblock.fs_frag) {
+ if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
+ continue;
+ if (newblk > 0)
+ return (newblk);
+ if (newblk < 0)
+ blkno = -newblk;
+ }
+ for (blkno = cgdata(&sblock, 0);
+ blkno < cgbase(&sblock, startcg) - sblock.fs_frag;
+ blkno += sblock.fs_frag) {
+ if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
+ continue;
+ if (newblk > 0)
+ return (newblk);
+ if (newblk < 0)
+ blkno = -newblk;
+ }
+ return (0);
+}
+
+ufs2_daddr_t
+std_checkblkavail(blkno, frags)
+ ufs2_daddr_t blkno;
+ long frags;
+{
+ struct bufarea *cgbp;
+ struct cg *cgp;
+ ufs2_daddr_t j, k, baseblk;
+ long cg;
+
+ for (j = 0; j <= sblock.fs_frag - frags; j++) {
+ if (testbmap(blkno + j))
+ continue;
+ for (k = 1; k < frags; k++)
+ if (testbmap(blkno + j + k))
+ break;
+ if (k < frags) {
+ j += k;
+ continue;
}
+ cg = dtog(&sblock, blkno + j);
+ cgbp = cglookup(cg);
+ cgp = cgbp->b_un.b_cg;
+ if (!check_cgmagic(cg, cgbp, 0))
+ return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
+ baseblk = dtogd(&sblock, blkno + j);
+ for (k = 0; k < frags; k++) {
+ setbmap(blkno + j + k);
+ clrbit(cg_blksfree(cgp), baseblk + k);
+ }
+ n_blks += frags;
+ if (frags == sblock.fs_frag)
+ cgp->cg_cs.cs_nbfree--;
+ else
+ cgp->cg_cs.cs_nffree -= frags;
+ cgdirty(cgbp);
+ return (blkno + j);
}
return (0);
}
@@ -1261,14 +1344,19 @@
/*
* Print details about a buffer.
*/
-static void
-prtbuf(const char *msg, struct bufarea *bp)
+void
+prtbuf(struct bufarea *bp, const char *fmt, ...)
{
-
- printf("%s: bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
- "index %jd\n", msg, bp, BT_BUFTYPE(bp->b_type),
- (intmax_t) bp->b_bno, bp->b_size, bp->b_refcnt,
- bp->b_flags & B_DIRTY ? "dirty" : "clean", (intmax_t) bp->b_index);
+ va_list ap;
+ va_start(ap, fmt);
+ if (preen)
+ (void)fprintf(stdout, "%s: ", cdevname);
+ (void)vfprintf(stdout, fmt, ap);
+ va_end(ap);
+ printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
+ "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno,
+ bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean",
+ (intmax_t) bp->b_index);
}
/*
diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c
--- a/sbin/fsck_ffs/inode.c
+++ b/sbin/fsck_ffs/inode.c
@@ -38,6 +38,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/stat.h>
#include <sys/stdint.h>
#include <sys/sysctl.h>
@@ -58,6 +59,9 @@
static int iblock(struct inodesc *, off_t isize, int type);
static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
struct bufarea **);
+static int snapclean(struct inodesc *idesc);
+static void chkcopyonwrite(struct fs *, ufs2_daddr_t,
+ ufs2_daddr_t (*checkblkavail)(long, long));
int
ckinode(union dinode *dp, struct inodesc *idesc)
@@ -378,8 +382,12 @@
int c;
if (cnt <= 0 || blk <= 0 || blk > maxfsblock ||
- cnt - 1 > maxfsblock - blk)
+ cnt - 1 > maxfsblock - blk) {
+ if (debug)
+ printf("out of range: blk %ld, offset %i, size %d\n",
+ (long)blk, (int)fragnum(&sblock, blk), cnt);
return (1);
+ }
if (cnt > sblock.fs_frag ||
fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
if (debug)
@@ -650,11 +658,21 @@
freeblock(struct inodesc *idesc)
{
struct dups *dlp;
+ struct bufarea *cgbp;
+ struct cg *cgp;
ufs2_daddr_t blkno;
- long nfrags, res;
+ long size, nfrags, res;
res = KEEPON;
blkno = idesc->id_blkno;
+ if (idesc->id_type == SNAP) {
+ pfatal("clearing a snapshot dinode\n");
+ return (STOP);
+ }
+ size = lfragtosize(&sblock, idesc->id_numfrags);
+ if (snapblkfree(&sblock, blkno, size, idesc->id_number,
+ std_checkblkavail))
+ return (res);
for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
if (chkrange(blkno, 1)) {
res = SKIP;
@@ -674,12 +692,407 @@
}
}
}
+ /*
+ * If all successfully returned, account for them.
+ */
+ if (nfrags == 0) {
+ cgbp = cglookup(dtog(&sblock, idesc->id_blkno));
+ cgp = cgbp->b_un.b_cg;
+ if (idesc->id_numfrags == sblock.fs_frag)
+ cgp->cg_cs.cs_nbfree++;
+ else
+ cgp->cg_cs.cs_nffree += idesc->id_numfrags;
+ cgdirty(cgbp);
+ }
return (res);
}
+/*
+ * Prepare a snapshot file for being removed.
+ */
+void
+snapremove(ino_t inum)
+{
+ struct inodesc idesc;
+ struct inode ip;
+ int i;
+
+ for (i = 0; i < snapcnt; i++)
+ if (snaplist[i].i_number == inum)
+ break;
+ if (i == snapcnt)
+ ginode(inum, &ip);
+ else
+ ip = snaplist[i];
+ if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
+ printf("snapremove: inode %jd is not a snapshot\n",
+ (intmax_t)inum);
+ if (i == snapcnt)
+ irelse(&ip);
+ return;
+ }
+ if (debug)
+ printf("snapremove: remove %sactive snapshot %jd\n",
+ i == snapcnt ? "in" : "", (intmax_t)inum);
+ /*
+ * If on active snapshot list, remove it.
+ */
+ if (i < snapcnt) {
+ for (i++; i < FSMAXSNAP; i++) {
+ if (sblock.fs_snapinum[i] == 0)
+ break;
+ snaplist[i - 1] = snaplist[i];
+ sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
+ }
+ sblock.fs_snapinum[i - 1] = 0;
+ bzero(&snaplist[i - 1], sizeof(struct inode));
+ snapcnt--;
+ }
+ idesc.id_type = SNAP;
+ idesc.id_func = snapclean;
+ idesc.id_number = inum;
+ (void)ckinode(ip.i_dp, &idesc);
+ DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
+ inodirty(&ip);
+ irelse(&ip);
+}
+
+static int
+snapclean(struct inodesc *idesc)
+{
+ ufs2_daddr_t blkno;
+ struct bufarea *bp;
+ union dinode *dp;
+
+ blkno = idesc->id_blkno;
+ if (blkno == 0)
+ return (KEEPON);
+
+ bp = idesc->id_bp;
+ dp = idesc->id_dp;
+ if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) {
+ if (idesc->id_lbn < UFS_NDADDR)
+ DIP_SET(dp, di_db[idesc->id_lbn], 0);
+ else
+ IBLK_SET(bp, bp->b_index, 0);
+ dirty(bp);
+ }
+ return (KEEPON);
+}
+
+/*
+ * Notification that a block is being freed. Return zero if the free
+ * should be allowed to proceed. Return non-zero if the snapshot file
+ * wants to claim the block. The block will be claimed if it is an
+ * uncopied part of one of the snapshots. It will be freed if it is
+ * either a BLK_NOCOPY or has already been copied in all of the snapshots.
+ * If a fragment is being freed, then all snapshots that care about
+ * it must make a copy since a snapshot file can only claim full sized
+ * blocks. Note that if more than one snapshot file maps the block,
+ * we can pick one at random to claim it. Since none of the snapshots
+ * can change, we are assurred that they will all see the same unmodified
+ * image. When deleting a snapshot file (see ino_trunc above), we
+ * must push any of these claimed blocks to one of the other snapshots
+ * that maps it. These claimed blocks are easily identified as they will
+ * have a block number equal to their logical block number within the
+ * snapshot. A copied block can never have this property because they
+ * must always have been allocated from a BLK_NOCOPY location.
+ */
+int
+snapblkfree(fs, bno, size, inum, checkblkavail)
+ struct fs *fs;
+ ufs2_daddr_t bno;
+ long size;
+ ino_t inum;
+ ufs2_daddr_t (*checkblkavail)(long cg, long frags);
+{
+ union dinode *dp;
+ struct inode ip;
+ struct bufarea *snapbp;
+ ufs_lbn_t lbn;
+ ufs2_daddr_t blkno, relblkno;
+ int i, frags, claimedblk, copydone;
+
+ /* If no snapshots, nothing to do */
+ if (snapcnt == 0)
+ return (0);
+ if (debug)
+ printf("snapblkfree: in ino %ld free blkno %ld, size %ld\n",
+ inum, bno, size);
+ relblkno = blknum(fs, bno);
+ lbn = fragstoblks(fs, relblkno);
+ /* Direct blocks are always pre-copied */
+ if (lbn < UFS_NDADDR)
+ return (0);
+ copydone = 0;
+ claimedblk = 0;
+ for (i = 0; i < snapcnt; i++) {
+ /*
+ * Lookup block being freed.
+ */
+ ip = snaplist[i];
+ dp = ip.i_dp;
+ blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
+ lbn, &frags, &snapbp);
+ /*
+ * Check to see if block needs to be copied.
+ */
+ if (blkno == 0) {
+ /*
+ * A block that we map is being freed. If it has not
+ * been claimed yet, we will claim or copy it (below).
+ */
+ claimedblk = 1;
+ } else if (blkno == BLK_SNAP) {
+ /*
+ * No previous snapshot claimed the block,
+ * so it will be freed and become a BLK_NOCOPY
+ * (don't care) for us.
+ */
+ if (claimedblk)
+ pfatal("snapblkfree: inconsistent block type");
+ IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
+ dirty(snapbp);
+ brelse(snapbp);
+ continue;
+ } else /* BLK_NOCOPY or default */ {
+ /*
+ * If the snapshot has already copied the block
+ * (default), or does not care about the block,
+ * it is not needed.
+ */
+ brelse(snapbp);
+ continue;
+ }
+ /*
+ * If this is a full size block, we will just grab it
+ * and assign it to the snapshot inode. Otherwise we
+ * will proceed to copy it. See explanation for this
+ * routine as to why only a single snapshot needs to
+ * claim this block.
+ */
+ if (size == fs->fs_bsize) {
+ if (debug)
+ printf("Grabonremove snapshot %ju lbn %jd "
+ "from inum %ju\n", (intmax_t)ip.i_number,
+ (intmax_t)lbn, (uintmax_t)inum);
+ IBLK_SET(snapbp, snapbp->b_index, relblkno);
+ dirty(snapbp);
+ brelse(snapbp);
+ DIP_SET(dp, di_blocks,
+ DIP(dp, di_blocks) + btodb(size));
+ inodirty(&ip);
+ return (1);
+ }
+
+ /* First time through, read the contents of the old block. */
+ if (copydone == 0) {
+ copydone = 1;
+ if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
+ fs->fs_bsize) != 0) {
+ pfatal("Could not read snapshot %ju block "
+ "%jd\n", (intmax_t)ip.i_number,
+ (intmax_t)relblkno);
+ continue;
+ }
+ }
+ /*
+ * This allocation will never require any additional
+ * allocations for the snapshot inode.
+ */
+ blkno = allocblk(dtog(fs, relblkno), fs->fs_frag,
+ checkblkavail);
+ if (blkno == 0) {
+ pfatal("Could not allocate block for snapshot %ju\n",
+ (intmax_t)ip.i_number);
+ continue;
+ }
+ if (debug)
+ printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
+ "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
+ (intmax_t)lbn, (uintmax_t)inum, size,
+ (intmax_t)blkno);
+ blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
+ IBLK_SET(snapbp, snapbp->b_index, blkno);
+ dirty(snapbp);
+ brelse(snapbp);
+ DIP_SET(dp, di_blocks,
+ DIP(dp, di_blocks) + btodb(fs->fs_bsize));
+ inodirty(&ip);
+ }
+ return (0);
+}
+
+/*
+ * Notification that a block is being written. Return if the block
+ * is part of a snapshot as snapshots never track other snapshots.
+ * The block will be copied in all of the snapshots that are tracking
+ * it and have not yet copied it. Some buffers may hold more than one
+ * block. Here we need to check each block in the buffer.
+ */
+void
+copyonwrite(fs, bp, checkblkavail)
+ struct fs *fs;
+ struct bufarea *bp;
+ ufs2_daddr_t (*checkblkavail)(long cg, long frags);
+{
+ ufs2_daddr_t copyblkno;
+ long i, numblks;
+
+ /* If no snapshots, nothing to do. */
+ if (snapcnt == 0)
+ return;
+ numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
+ if (debug)
+ prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
+ numblks, numblks > 1 ? "s" : "");
+ copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
+ for (i = 0; i < numblks; i++) {
+ chkcopyonwrite(fs, copyblkno, checkblkavail);
+ copyblkno += fs->fs_frag;
+ }
+}
+
+static void
+chkcopyonwrite(fs, copyblkno, checkblkavail)
+ struct fs *fs;
+ ufs2_daddr_t copyblkno;
+ ufs2_daddr_t (*checkblkavail)(long cg, long frags);
+{
+ struct inode ip;
+ union dinode *dp;
+ struct bufarea *snapbp;
+ ufs2_daddr_t blkno;
+ int i, frags, copydone;
+ ufs_lbn_t lbn;
+
+ lbn = fragstoblks(fs, copyblkno);
+ /* Direct blocks are always pre-copied */
+ if (lbn < UFS_NDADDR)
+ return;
+ copydone = 0;
+ for (i = 0; i < snapcnt; i++) {
+ /*
+ * Lookup block being freed.
+ */
+ ip = snaplist[i];
+ dp = ip.i_dp;
+ blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
+ /*
+ * Check to see if block needs to be copied.
+ */
+ if (blkno != 0) {
+ /*
+ * A block that we have already copied or don't track.
+ */
+ brelse(snapbp);
+ continue;
+ }
+ /* First time through, read the contents of the old block. */
+ if (copydone == 0) {
+ copydone = 1;
+ if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
+ fs->fs_bsize) != 0) {
+ pfatal("Could not read snapshot %ju block "
+ "%jd\n", (intmax_t)ip.i_number,
+ (intmax_t)copyblkno);
+ continue;
+ }
+ }
+ /*
+ * This allocation will never require any additional
+ * allocations for the snapshot inode.
+ */
+ if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
+ checkblkavail)) == 0) {
+ pfatal("Could not allocate block for snapshot %ju\n",
+ (intmax_t)ip.i_number);
+ continue;
+ }
+ if (debug)
+ prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
+ "blkno %ju setting in buffer",
+ (intmax_t)ip.i_number, (intmax_t)lbn,
+ (intmax_t)blkno);
+ blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
+ IBLK_SET(snapbp, snapbp->b_index, blkno);
+ dirty(snapbp);
+ brelse(snapbp);
+ DIP_SET(dp, di_blocks,
+ DIP(dp, di_blocks) + btodb(fs->fs_bsize));
+ inodirty(&ip);
+ }
+ return;
+}
+
+/*
+ * Traverse an inode and check that its block count is correct
+ * fixing it if necessary.
+ */
+void
+check_blkcnt(struct inode *ip)
+{
+ struct inodesc idesc;
+ union dinode *dp;
+ ufs2_daddr_t ndb;
+ int j, ret, offset;
+
+ dp = ip->i_dp;
+ memset(&idesc, 0, sizeof(struct inodesc));
+ idesc.id_func = pass1check;
+ idesc.id_number = ip->i_number;
+ idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP;
+ (void)ckinode(dp, &idesc);
+ if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) {
+ ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize);
+ for (j = 0; j < UFS_NXADDR; j++) {
+ if (--ndb == 0 &&
+ (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0)
+ idesc.id_numfrags = numfrags(&sblock,
+ fragroundup(&sblock, offset));
+ else
+ idesc.id_numfrags = sblock.fs_frag;
+ if (dp->dp2.di_extb[j] == 0)
+ continue;
+ idesc.id_blkno = dp->dp2.di_extb[j];
+ ret = (*idesc.id_func)(&idesc);
+ if (ret & STOP)
+ break;
+ }
+ }
+ idesc.id_entryno *= btodb(sblock.fs_fsize);
+ if (DIP(dp, di_blocks) != idesc.id_entryno) {
+ if (!(sujrecovery && preen)) {
+ pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)",
+ (u_long)idesc.id_number,
+ (uintmax_t)DIP(dp, di_blocks),
+ (uintmax_t)idesc.id_entryno);
+ if (preen)
+ printf(" (CORRECTED)\n");
+ else if (reply("CORRECT") == 0)
+ return;
+ }
+ if (bkgrdflag == 0) {
+ DIP_SET(dp, di_blocks, idesc.id_entryno);
+ inodirty(ip);
+ } else {
+ cmd.value = idesc.id_number;
+ cmd.size = idesc.id_entryno - DIP(dp, di_blocks);
+ if (debug)
+ printf("adjblkcnt ino %ju amount %lld\n",
+ (uintmax_t)cmd.value, (long long)cmd.size);
+ if (sysctl(adjblkcnt, MIBSIZE, 0, 0,
+ &cmd, sizeof cmd) == -1)
+ rwerror("ADJUST INODE BLOCK COUNT", cmd.value);
+ }
+ }
+}
+
void
freeinodebuf(void)
{
+ struct bufarea *bp;
+ int i;
/*
* Flush old contents in case they have been updated.
@@ -689,6 +1102,14 @@
free((char *)inobuf.b_un.b_buf);
inobuf.b_un.b_buf = NULL;
firstinum = lastinum = 0;
+ /*
+ * Reload the snapshot inodes in case any of them changed.
+ */
+ for (i = 0; i < snapcnt; i++) {
+ bp = snaplist[i].i_bp;
+ bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
+ bp->b_size);
+ }
}
/*
@@ -720,6 +1141,7 @@
inpp = &inphead[inumber % dirhash];
inp->i_nexthash = *inpp;
*inpp = inp;
+ inp->i_flags = 0;
inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0;
inp->i_dotdot = (ino_t)0;
inp->i_number = inumber;
@@ -803,6 +1225,10 @@
printf(" (CLEARED)\n");
n_files--;
if (bkgrdflag == 0) {
+ if (idesc->id_type == SNAP) {
+ snapremove(idesc->id_number);
+ idesc->id_type = ADDR;
+ }
(void)ckinode(dp, idesc);
inoinfo(idesc->id_number)->ino_state = USTATE;
clearinode(dp);
@@ -967,7 +1393,8 @@
cgdirty(cgbp);
ginode(ino, &ip);
dp = ip.i_dp;
- DIP_SET(dp, di_db[0], allocblk((long)1));
+ DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
+ std_checkblkavail));
if (DIP(dp, di_db[0]) == 0) {
inoinfo(ino)->ino_state = USTATE;
irelse(&ip);
diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c
--- a/sbin/fsck_ffs/main.c
+++ b/sbin/fsck_ffs/main.c
@@ -491,6 +491,7 @@
*/
if (preen == 0)
printf("** Phase 5 - Check Cyl groups\n");
+ snapflush(std_checkblkavail);
pass5();
IOstats("Pass5");
diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c
--- a/sbin/fsck_ffs/setup.c
+++ b/sbin/fsck_ffs/setup.c
@@ -59,6 +59,9 @@
#include "fsck.h"
struct inoinfo **inphead, **inpsort; /* info about all inodes */
+struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */
+int snapcnt; /* number of active snapshots */
+char *copybuf; /* buffer to copy snapshot blocks */
static int sbhashfailed;
#define POWEROF2(num) (((num) & ((num) - 1)) == 0)
@@ -66,6 +69,8 @@
static int calcsb(char *dev, int devfd, struct fs *fs);
static void saverecovery(int readfd, int writefd);
static int chkrecovery(int devfd);
+static int getlbnblkno(struct inodesc *);
+static int checksnapinfo(struct inode *);
/*
* Read in a superblock finding an alternate if necessary.
@@ -75,7 +80,8 @@
int
setup(char *dev)
{
- long bmapsize;
+ long i, bmapsize;
+ struct inode ip;
/*
* We are expected to have an open file descriptor and a superblock.
@@ -174,6 +180,42 @@
usedsoftdep = 1;
else
usedsoftdep = 0;
+ /*
+ * Collect any snapshot inodes so that we can allow them to
+ * claim any blocks that we free. The code for doing this is
+ * imported here and into inode.c from sys/ufs/ffs/ffs_snapshot.c.
+ */
+ for (snapcnt = 0; snapcnt < FSMAXSNAP; snapcnt++) {
+ if (sblock.fs_snapinum[snapcnt] == 0)
+ break;
+ ginode(sblock.fs_snapinum[snapcnt], &ip);
+ if ((DIP(ip.i_dp, di_mode) & IFMT) == IFREG &&
+ (DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) != 0 &&
+ checksnapinfo(&ip)) {
+ if (debug)
+ printf("Load snapshot %jd\n",
+ (intmax_t)sblock.fs_snapinum[snapcnt]);
+ snaplist[snapcnt] = ip;
+ continue;
+ }
+ printf("Removing non-snapshot inode %ju from snapshot list\n",
+ (uintmax_t)sblock.fs_snapinum[snapcnt]);
+ irelse(&ip);
+ for (i = snapcnt + 1; i < FSMAXSNAP; i++) {
+ if (sblock.fs_snapinum[i] == 0)
+ break;
+ sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
+ }
+ sblock.fs_snapinum[i - 1] = 0;
+ snapcnt--;
+ sbdirty();
+ }
+ if (snapcnt > 0 && copybuf == NULL) {
+ copybuf = Malloc(sblock.fs_bsize);
+ if (copybuf == NULL)
+ errx(EEXIT, "cannot allocate space for snapshot "
+ "copy buffer");
+ }
return (1);
badsb:
@@ -181,6 +223,144 @@
return (0);
}
+/*
+ * Check for valid snapshot information.
+ *
+ * Each snapshot has a list of blocks that have been copied. This list
+ * is consulted before checking the snapshot inode. Its purpose is to
+ * speed checking of commonly checked blocks and to avoid recursive
+ * checks of the snapshot inode. In particular, the list must contain
+ * the superblock, the superblock summary information, and all the
+ * cylinder group blocks. The list may contain other commonly checked
+ * pointers such as those of the blocks that contain the snapshot inodes.
+ * The list is sorted into block order to allow binary search lookup.
+ *
+ * The twelve direct direct block pointers of the snapshot are always
+ * copied, so we test for them first before checking the list itself
+ * (i.e., they are not in the list).
+ *
+ * The checksnapinfo() routine needs to ensure that the list contains at
+ * least the super block, its summary information, and the cylinder groups.
+ * Here we check the list first for the superblock, zero or more cylinder
+ * groups up to the location of the superblock summary information, the
+ * summary group information, and any remaining cylinder group maps that
+ * follow it. We skip over any other entries in the list.
+ */
+#define CHKBLKINLIST(chkblk) \
+ /* All UFS_NDADDR blocks are copied */ \
+ if ((chkblk) >= UFS_NDADDR) { \
+ /* Skip over blocks that are not of interest */ \
+ while (*blkp < (chkblk) && blkp < lastblkp) \
+ blkp++; \
+ /* Fail if end of list and not all blocks found */ \
+ if (blkp >= lastblkp) { \
+ pwarn("UFS%d snapshot inode %jd failed: " \
+ "improper block list length (%jd)\n", \
+ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \
+ (intmax_t)snapip->i_number, \
+ (intmax_t)(lastblkp - &snapblklist[0])); \
+ status = 0; \
+ } \
+ /* Fail if block we seek is missing */ \
+ else if (*blkp++ != (chkblk)) { \
+ pwarn("UFS%d snapshot inode %jd failed: " \
+ "block list (%jd) != %s (%jd)\n", \
+ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \
+ (intmax_t)snapip->i_number, \
+ (intmax_t)blkp[-1], #chkblk, \
+ (intmax_t)chkblk); \
+ status = 0; \
+ } \
+ }
+
+static int
+checksnapinfo(struct inode *snapip)
+{
+ struct fs *fs;
+ struct bufarea *bp;
+ struct inodesc idesc;
+ daddr_t *snapblklist, *blkp, *lastblkp, csblkno;
+ int cg, loc, len, status;
+ ufs_lbn_t lbn;
+ size_t size;
+
+ fs = &sblock;
+ memset(&idesc, 0, sizeof(struct inodesc));
+ idesc.id_type = ADDR;
+ idesc.id_func = getlbnblkno;
+ idesc.id_number = snapip->i_number;
+ lbn = howmany(fs->fs_size, fs->fs_frag);
+ idesc.id_parent = lbn; /* sought after blkno */
+ if ((ckinode(snapip->i_dp, &idesc) & FOUND) == 0)
+ return (0);
+ size = fragroundup(fs,
+ DIP(snapip->i_dp, di_size) - lblktosize(fs, lbn));
+ bp = getdatablk(idesc.id_parent, size, BT_DATA);
+ snapblklist = (daddr_t *)bp->b_un.b_buf;
+ /*
+ * snapblklist[0] is the size of the list
+ * snapblklist[1] is the first element of the list
+ *
+ * We need to be careful to bound the size of the list and verify
+ * that we have not run off the end of it if it or its size has
+ * been corrupted.
+ */
+ blkp = &snapblklist[1];
+ lastblkp = &snapblklist[MAX(0,
+ MIN(snapblklist[0] + 1, size / sizeof(daddr_t)))];
+ status = 1;
+ /* Check that the superblock is listed. */
+ CHKBLKINLIST(lblkno(fs, fs->fs_sblockloc));
+ if (status == 0)
+ goto out;
+ /*
+ * Calculate where the summary information is located.
+ * Usually it is in the first cylinder group, but growfs
+ * may move it to the first cylinder group that it adds.
+ *
+ * Check all cylinder groups up to the summary information.
+ */
+ csblkno = fragstoblks(fs, fs->fs_csaddr);
+ for (cg = 0; cg < fs->fs_ncg; cg++) {
+ if (fragstoblks(fs, cgtod(fs, cg)) > csblkno)
+ break;
+ CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg)));
+ if (status == 0)
+ goto out;
+ }
+ /* Check the summary information block(s). */
+ len = howmany(fs->fs_cssize, fs->fs_bsize);
+ for (loc = 0; loc < len; loc++) {
+ CHKBLKINLIST(csblkno + loc);
+ if (status == 0)
+ goto out;
+ }
+ /* Check the remaining cylinder groups. */
+ for (; cg < fs->fs_ncg; cg++) {
+ CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg)));
+ if (status == 0)
+ goto out;
+ }
+out:
+ brelse(bp);
+ return (status);
+}
+
+/*
+ * Return the block number associated with a specified inode lbn.
+ * Requested lbn is in id_parent. If found, block is returned in
+ * id_parent.
+ */
+static int
+getlbnblkno(struct inodesc *idesc)
+{
+
+ if (idesc->id_lbn < idesc->id_parent)
+ return (KEEPON);
+ idesc->id_parent = idesc->id_blkno;
+ return (STOP | FOUND);
+}
+
/*
* Open a device or file to be checked by fsck.
*/
diff --git a/sbin/fsck_ffs/suj.c b/sbin/fsck_ffs/suj.c
--- a/sbin/fsck_ffs/suj.c
+++ b/sbin/fsck_ffs/suj.c
@@ -321,7 +321,7 @@
* To be certain we're not freeing a reallocated block we lookup
* this block in the blk hash and see if there is an allocation
* journal record that overlaps with any fragments in the block
- * we're concerned with. If any fragments have ben reallocated
+ * we're concerned with. If any fragments have been reallocated
* the block has already been freed and re-used for another purpose.
*/
mask = 0;
@@ -378,6 +378,50 @@
return (0);
}
+/*
+ * Check to see if the requested block is available.
+ * We can just check in the cylinder-group maps as
+ * they will only have usable blocks in them.
+ */
+ufs2_daddr_t
+suj_checkblkavail(blkno, frags)
+ ufs2_daddr_t blkno;
+ long frags;
+{
+ struct bufarea *cgbp;
+ struct cg *cgp;
+ ufs2_daddr_t j, k, baseblk;
+ long cg;
+
+ cg = dtog(&sblock, blkno);
+ cgbp = cglookup(cg);
+ cgp = cgbp->b_un.b_cg;
+ if (!check_cgmagic(cg, cgbp, 0))
+ return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
+ baseblk = dtogd(&sblock, blkno);
+ for (j = 0; j <= sblock.fs_frag - frags; j++) {
+ if (!isset(cg_blksfree(cgp), baseblk + j))
+ continue;
+ for (k = 1; k < frags; k++)
+ if (!isset(cg_blksfree(cgp), baseblk + j + k))
+ break;
+ if (k < frags) {
+ j += k;
+ continue;
+ }
+ for (k = 0; k < frags; k++)
+ clrbit(cg_blksfree(cgp), baseblk + j + k);
+ n_blks += frags;
+ if (frags == sblock.fs_frag)
+ cgp->cg_cs.cs_nbfree--;
+ else
+ cgp->cg_cs.cs_nffree -= frags;
+ cgdirty(cgbp);
+ return ((cg * sblock.fs_fpg) + baseblk + j);
+ }
+ return (0);
+}
+
/*
* Clear an inode from the cg bitmap. If the inode was already clear return
* 0 so the caller knows it does not have to check the inode contents.
@@ -420,7 +464,7 @@
* set in the mask.
*/
static void
-blk_free(ufs2_daddr_t bno, int mask, int frags)
+blk_free(ino_t ino, ufs2_daddr_t bno, int mask, int frags)
{
ufs1_daddr_t fragno, cgbno;
struct suj_cg *sc;
@@ -431,6 +475,13 @@
if (debug)
printf("Freeing %d frags at blk %jd mask 0x%x\n",
frags, bno, mask);
+ /*
+ * Check to see if the block needs to be claimed by a snapshot.
+ * If wanted, the snapshot references it. Otherwise we free it.
+ */
+ if (snapblkfree(fs, bno, lfragtosize(fs, frags), ino,
+ suj_checkblkavail))
+ return;
cg = dtog(fs, bno);
sc = cg_lookup(cg);
cgp = sc->sc_cgp;
@@ -846,7 +897,7 @@
blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
{
- blk_free(blk, blk_freemask(blk, ino, lbn, frags), frags);
+ blk_free(ino, blk, blk_freemask(blk, ino, lbn, frags), frags);
}
/*
@@ -865,7 +916,7 @@
if (lbn <= -UFS_NDADDR && follow && mask == 0)
indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR);
else
- blk_free(blk, mask, frags);
+ blk_free(ino, blk, mask, frags);
}
static void
@@ -997,6 +1048,8 @@
if ((DIP(dp, di_mode) & IFMT) == IFDIR)
ino_visit(dp, ino, ino_free_children, 0);
DIP_SET(dp, di_nlink, 0);
+ if ((DIP(dp, di_flags) & SF_SNAPSHOT) != 0)
+ snapremove(ino);
ino_visit(dp, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR);
/* Here we have to clear the inode and release any blocks it holds. */
gen = DIP(dp, di_gen);
@@ -1209,7 +1262,7 @@
continue;
}
isdirty = 1;
- blk_free(nblk, 0, fs->fs_frag);
+ blk_free(ino, nblk, 0, fs->fs_frag);
IBLK_SET(bp, i, 0);
}
if (isdirty)
@@ -1245,6 +1298,11 @@
dp = ip.i_dp;
mode = DIP(dp, di_mode) & IFMT;
cursize = DIP(dp, di_size);
+ /* If no size change, nothing to do */
+ if (size == cursize) {
+ irelse(&ip);
+ return;
+ }
if (debug)
printf("Truncating ino %ju, mode %o to size %jd from size %jd\n",
(uintmax_t)ino, mode, size, cursize);
@@ -1264,13 +1322,14 @@
if (size > 0)
err_suj("Partial truncation of ino %ju snapshot file\n",
(uintmax_t)ino);
+ snapremove(ino);
}
lastlbn = lblkno(fs, blkroundup(fs, size));
for (i = lastlbn; i < UFS_NDADDR; i++) {
if ((bn = DIP(dp, di_db[i])) == 0)
continue;
blksize = sblksize(fs, cursize, i);
- blk_free(bn, 0, numfrags(fs, blksize));
+ blk_free(ino, bn, 0, numfrags(fs, blksize));
DIP_SET(dp, di_db[i], 0);
}
/*
@@ -1283,13 +1342,13 @@
/* If we're not freeing any in this indirect range skip it. */
if (lastlbn >= nextlbn)
continue;
- if (DIP(dp, di_ib[i]) == 0)
- continue;
- indir_trunc(ino, -lbn - i, DIP(dp, di_ib[i]), lastlbn, dp);
- /* If we freed everything in this indirect free the indir. */
- if (lastlbn > lbn)
- continue;
- blk_free(DIP(dp, di_ib[i]), 0, fs->fs_frag);
+ if ((bn = DIP(dp, di_ib[i])) == 0)
+ continue;
+ indir_trunc(ino, -lbn - i, bn, lastlbn, dp);
+ /* If we freed everything in this indirect free the indir. */
+ if (lastlbn > lbn)
+ continue;
+ blk_free(ino, bn, 0, fs->fs_frag);
DIP_SET(dp, di_ib[i], 0);
}
/*
@@ -1319,7 +1378,7 @@
if (oldspace != newspace) {
bn += numfrags(fs, newspace);
frags = numfrags(fs, oldspace - newspace);
- blk_free(bn, 0, frags);
+ blk_free(ino, bn, 0, frags);
totalfrags -= frags;
}
}
@@ -1468,7 +1527,7 @@
mask >>= frags;
blk += frags;
frags = brec->jb_frags - frags;
- blk_free(blk, mask, frags);
+ blk_free(brec->jb_ino, blk, mask, frags);
continue;
}
/*
@@ -2406,6 +2465,13 @@
}
if (preen == 0 && (jrecs > 0 || jbytes > 0) && reply("WRITE CHANGES") == 0)
return (0);
+ /*
+ * Check block counts of snapshot inodes and
+ * make copies of any needed snapshot blocks.
+ */
+ for (i = 0; i < snapcnt; i++)
+ check_blkcnt(&snaplist[i]);
+ snapflush(suj_checkblkavail);
/*
* Recompute the fs summary info from correct cs summaries.
*/
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Nov 18, 12:16 PM (20 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14695846
Default Alt Text
D36491.id112753.diff (44 KB)
Attached To
Mode
D36491: Add support for managing UFS/FFS snapshots to fsck_ffs(8)
Attached
Detach File
Event Timeline
Log In to Comment