From: Dave Chinner <dchinner@redhat.com> Date: Tue, 20 Jul 2010 01:14:01 -0400 Subject: [fs] xfs: validate untrusted inode numbers during lookup Message-id: <1279588442-3741-3-git-send-email-dchinner@redhat.com> Patchwork-id: 26955 O-Subject: [RHEL5.6 2/3] xfs: validate untrusted inode numbers during lookup Bugzilla: 607032 RH-Acked-by: Christoph Hellwig <chellwig@redhat.com> Upstream Commit: 7124fe0a5b619d65b739477b3b55a20bf805b06d RH BZ: 607032 When we decode a handle or do a bulkstat lookup, we are using an inode number we cannot trust to be valid. If we are deleting inode chunks from disk (default noikeep mode), then we cannot trust the on disk inode buffer for any given inode number to correctly reflect whether the inode has been unlinked as the di_mode nor the generation number may have been updated on disk. This is due to the fact that when we delete an inode chunk, we do not write the clusters back to disk when they are removed - instead we mark them stale to avoid them being written back potentially over the top of something that has been subsequently allocated at that location. The result is that we can have locations of disk that look like they contain valid inodes but in reality do not. Hence we cannot simply convert the inode number to a block number and read the location from disk to determine if the inode is valid or not. As a result, and XFS_IGET_BULKSTAT lookup needs to actually look the inode up in the inode allocation btree to determine if the inode number is valid or not. It should be noted even on ikeep filesystems, there is the possibility that blocks on disk may look like valid inode clusters. e.g. if there are filesystem images hosted on the filesystem. Hence even for ikeep filesystems we really need to validate that the inode number is valid before issuing the inode buffer read. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index aad8c5d..541e3c1 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1157,6 +1157,69 @@ error0: return error; } +STATIC int +xfs_imap_lookup( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agino_t agino, + xfs_agblock_t agbno, + xfs_agblock_t *chunk_agbno, + xfs_agblock_t *offset_agbno, + int flags) +{ + xfs_agino_t chunk_agino; /* first agino in inode chunk */ + __int32_t chunk_cnt; /* count of free inodes in chunk */ + xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; + xfs_agino_t startino; + int error; + int i; + + down_read(&mp->m_peraglock); + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + up_read(&mp->m_peraglock); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " + "xfs_ialloc_read_agi() returned " + "error %d, agno %d", + error, agno); + return error; + } + + /* + * derive and lookup the exact inode record for the given agino. If the + * record cannot be found, then it's an invalid inode number and we + * should abort. + */ + cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, + (xfs_inode_t *)0, 0); + startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); + error = xfs_inobt_lookup_eq(cur, startino, 0, 0, &i); + if (!error) { + if (i) + error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, + &chunk_free, &i); + if (!error && i == 0) + error = EINVAL; + } + + xfs_trans_brelse(tp, agbp); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + if (error) + return error; + + /* for untrusted inodes check it is allocated first */ + if ((flags & XFS_IGET_BULKSTAT) && + (chunk_free & XFS_INOBT_MASK(agino - chunk_agino))) + return EINVAL; + + *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); + *offset_agbno = agbno - *chunk_agbno; + return 0; +} + /* * Return the location of the inode in bno/off, for mapping it into a buffer. */ @@ -1172,18 +1235,12 @@ xfs_dilocate( uint flags) /* flags concerning inode lookup */ { xfs_agblock_t agbno; /* block number of inode in the alloc group */ - xfs_buf_t *agbp; /* agi buffer */ xfs_agino_t agino; /* inode number within alloc group */ xfs_agnumber_t agno; /* allocation group number */ int blks_per_cluster; /* num blocks per inode cluster */ xfs_agblock_t chunk_agbno; /* first block in inode chunk */ - xfs_agino_t chunk_agino; /* first agino in inode chunk */ - __int32_t chunk_cnt; /* count of free inodes in chunk */ - xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ xfs_agblock_t cluster_agbno; /* first block in inode cluster */ - xfs_btree_cur_t *cur; /* inode btree cursor */ int error; /* error code */ - int i; /* temp state */ int offset; /* index of inode in its buffer */ int offset_agbno; /* blks from chunk start to inode */ @@ -1224,6 +1281,24 @@ xfs_dilocate( #endif /* DEBUG */ return XFS_ERROR(EINVAL); } + + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; + + /* + * For bulkstat and handle lookups, we have an untrusted inode number + * that we have to verify is valid. We cannot do this just by reading + * the inode buffer as it may have been unlinked and removed leaving + * inodes in stale state on disk. Hence we have to do a btree lookup + * in all cases where an untrusted inode number is passed. + */ + if (flags & XFS_IGET_BULKSTAT) { + error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + &chunk_agbno, &offset_agbno, flags); + if (error) + return error; + goto out_map; + } + if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) || !(flags & XFS_IMAP_LOOKUP)) { offset = XFS_INO_TO_OFFSET(mp, ino); @@ -1233,7 +1308,7 @@ xfs_dilocate( *len = 1; return 0; } - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; + if (*bno != NULLFSBLOCK) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); @@ -1243,53 +1318,17 @@ xfs_dilocate( *len = blks_per_cluster; return 0; } + if (mp->m_inoalign_mask) { offset_agbno = agbno & mp->m_inoalign_mask; chunk_agbno = agbno - offset_agbno; } else { - down_read(&mp->m_peraglock); - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); - if (error) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " - "xfs_ialloc_read_agi() returned " - "error %d, agno %d", - error, agno); -#endif /* DEBUG */ - return error; - } - cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, - (xfs_inode_t *)0, 0); - if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " - "xfs_inobt_lookup_le() failed"); -#endif /* DEBUG */ - goto error0; - } - if ((error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, - &chunk_free, &i))) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " - "xfs_inobt_get_rec() failed"); -#endif /* DEBUG */ - goto error0; - } - if (i == 0) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " - "xfs_inobt_get_rec() failed"); -#endif /* DEBUG */ - error = XFS_ERROR(EINVAL); - } - xfs_trans_brelse(tp, agbp); - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + &chunk_agbno, &offset_agbno, flags); if (error) return error; - chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); - offset_agbno = agbno - chunk_agbno; } +out_map: ASSERT(agbno >= chunk_agbno); cluster_agbno = chunk_agbno + ((offset_agbno / blks_per_cluster) * blks_per_cluster); @@ -1299,10 +1338,6 @@ xfs_dilocate( *off = offset; *len = blks_per_cluster; return 0; -error0: - xfs_trans_brelse(tp, agbp); - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; } /*