// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*/
/* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to * keep it small.
*/ struct metapath { struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
__u16 mp_list[GFS2_MAX_META_HEIGHT]; int mp_fheight; /* find_metapath height */ int mp_aheight; /* actual height (lookup height) */
};
/** * gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio * @ip: the inode * @dibh: the dinode buffer * @block: the block number that was allocated * @folio: The folio. * * Returns: errno
*/ staticint gfs2_unstuffer_folio(struct gfs2_inode *ip, struct buffer_head *dibh,
u64 block, struct folio *folio)
{ struct inode *inode = &ip->i_inode;
/** * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big * @ip: The GFS2 inode to unstuff * * This routine unstuffs a dinode and returns it to a "normal" state such * that the height can be grown in the traditional way. * * Returns: errno
*/
int gfs2_unstuff_dinode(struct gfs2_inode *ip)
{ struct inode *inode = &ip->i_inode; struct folio *folio; int error;
/** * find_metapath - Find path through the metadata tree * @sdp: The superblock * @block: The disk block to look up * @mp: The metapath to return the result in * @height: The pre-calculated height of the metadata tree * * This routine returns a struct metapath structure that defines a path * through the metadata of inode "ip" to get to block "block". * * Example: * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a * filesystem with a blocksize of 4096. * * find_metapath() would return a struct metapath structure set to: * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165. * * That means that in order to get to the block containing the byte at * offset 101342453, we would load the indirect block pointed to by pointer * 0 in the dinode. We would then load the indirect block pointed to by * pointer 48 in that indirect block. We would then load the data block * pointed to by pointer 165 in that indirect block. * * ---------------------------------------- * | Dinode | | * | | 4| * | |0 1 2 3 4 5 9| * | | 6| * ---------------------------------------- * | * | * V * ---------------------------------------- * | Indirect Block | * | 5| * | 4 4 4 4 4 5 5 1| * |0 5 6 7 8 9 0 1 2| * ---------------------------------------- * | * | * V * ---------------------------------------- * | Indirect Block | * | 1 1 1 1 1 5| * | 6 6 6 6 6 1| * |0 3 4 5 6 7 2| * ---------------------------------------- * | * | * V * ---------------------------------------- * | Data block containing offset | * | 101342453 | * | | * | | * ---------------------------------------- *
*/
/** * metaptr1 - Return the first possible metadata pointer in a metapath buffer * @height: The metadata height (0 = dinode) * @mp: The metapath
*/ staticinline __be64 *metaptr1(unsignedint height, conststruct metapath *mp)
{ struct buffer_head *bh = mp->mp_bh[height]; if (height == 0) return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode))); return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
}
/** * metapointer - Return pointer to start of metadata in a buffer * @height: The metadata height (0 = dinode) * @mp: The metapath * * Return a pointer to the block number of the next height of the metadata * tree given a buffer containing the pointer to the current height of the * metadata tree.
*/
staticint __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, unsignedint x, unsignedint h)
{ for (; x < h; x++) {
__be64 *ptr = metapointer(x, mp);
u64 dblock = be64_to_cpu(*ptr); int ret;
if (!dblock) break;
ret = gfs2_meta_buffer(ip, GFS2_METATYPE_IN, dblock, &mp->mp_bh[x + 1]); if (ret) return ret;
}
mp->mp_aheight = x + 1; return 0;
}
/** * lookup_metapath - Walk the metadata tree to a specific point * @ip: The inode * @mp: The metapath * * Assumes that the inode's buffer has already been looked up and * hooked onto mp->mp_bh[0] and that the metapath has been initialised * by find_metapath(). * * If this function encounters part of the tree which has not been * allocated, it returns the current height of the tree at the point * at which it found the unallocated block. Blocks which are found are * added to the mp->mp_bh[] list. * * Returns: error
*/
/** * fillup_metapath - fill up buffers for the metadata path to a specific height * @ip: The inode * @mp: The metapath * @h: The height to which it should be mapped * * Similar to lookup_metapath, but does lookups for a range of heights * * Returns: error or the number of buffers filled
*/
staticint fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
{ unsignedint x = 0; int ret;
if (h) { /* find the first buffer we need to look up. */ for (x = h - 1; x > 0; x--) { if (mp->mp_bh[x]) break;
}
}
ret = __fillup_metapath(ip, mp, x, h); if (ret) return ret; return mp->mp_aheight - x - 1;
}
staticvoid release_metapath(struct metapath *mp)
{ int i;
for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { if (mp->mp_bh[i] == NULL) break;
brelse(mp->mp_bh[i]);
mp->mp_bh[i] = NULL;
}
}
/** * gfs2_extent_length - Returns length of an extent of blocks * @bh: The metadata block * @ptr: Current position in @bh * @eob: Set to 1 if we hit "end of block" * * Returns: The length of the extent (minimum of one block)
*/
/* * gfs2_metadata_walker - walk an indirect block * @mp: Metapath to indirect block * @ptrs: Number of pointers to look at * * When returning WALK_FOLLOW, the walker must update @mp to point at the right * indirect block to follow.
*/ typedefenum walker_status (*gfs2_metadata_walker)(struct metapath *mp, unsignedint ptrs);
/* * gfs2_walk_metadata - walk a tree of indirect blocks * @inode: The inode * @mp: Starting point of walk * @max_len: Maximum number of blocks to walk * @walker: Called during the walk * * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or * past the end of metadata, and a negative error code otherwise.
*/
/* * The walk starts in the lowest allocated indirect block, which may be * before the position indicated by @mp. Adjust @max_len accordingly * to avoid a short walk.
*/ for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
max_len += mp->mp_list[hgt] * factor;
mp->mp_list[hgt] = 0;
factor *= sdp->sd_inptrs;
}
for (ptr = start; ptr < end; ptr++) { if (*ptr) {
mp->mp_list[hgt] += ptr - start; if (mp->mp_aheight == mp->mp_fheight) return WALK_STOP; return WALK_FOLLOW;
}
} return WALK_CONTINUE;
}
/** * gfs2_hole_size - figure out the size of a hole * @inode: The inode * @lblock: The logical starting block number * @len: How far to look (in blocks) * @mp: The metapath at lblock * @iomap: The iomap to store the hole size in * * This function modifies @mp. * * Returns: errno on error
*/ staticint gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len, struct metapath *mp, struct iomap *iomap)
{ struct metapath clone;
u64 hole_size; int ret;
clone_metapath(&clone, mp);
ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker); if (ret < 0) goto out;
/** * __gfs2_iomap_alloc - Build a metadata tree of the requested height * @inode: The GFS2 inode * @iomap: The iomap structure * @mp: The metapath, with proper height information calculated * * In this routine we may have to alloc: * i) Indirect blocks to grow the metadata tree height * ii) Indirect blocks to fill in lower part of the metadata tree * iii) Data blocks * * This function is called after __gfs2_iomap_get, which works out the * total number of blocks which we need via gfs2_alloc_size. * * We then do the actual allocation asking for an extent at a time (if * enough contiguous free blocks are available, there will only be one * allocation request per call) and uses the state machine to initialise * the blocks in order. * * Right now, this function will allocate at most one indirect block * worth of data -- with a default block size of 4K, that's slightly * less than 2M. If this limitation is ever removed to allow huge * allocations, we would probably still want to limit the iomap size we * return to avoid stalling other tasks during huge writes; the next * iomap iteration would then find the blocks already allocated. * * Returns: errno on error
*/
/** * gfs2_alloc_size - Compute the maximum allocation size * @inode: The inode * @mp: The metapath * @size: Requested size in blocks * * Compute the maximum size of the next allocation at @mp. * * Returns: size in blocks
*/ static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
{ struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); const __be64 *first, *ptr, *end;
/* * For writes to stuffed files, this function is called twice via * __gfs2_iomap_get, before and after unstuffing. The size we return the * first time needs to be large enough to get the reservation and * allocation sizes right. The size we return the second time must * be exact or else __gfs2_iomap_alloc won't do the right thing.
*/
ret = gfs2_trans_begin(sdp, rblocks,
iomap->length >> inode->i_blkbits); if (ret) goto out_trans_fail;
if (unstuff) {
ret = gfs2_unstuff_dinode(ip); if (ret) goto out_trans_end;
release_metapath(mp);
ret = __gfs2_iomap_get(inode, iomap->offset,
iomap->length, flags, iomap, mp); if (ret) goto out_trans_end;
}
if (iomap->type == IOMAP_HOLE) {
ret = __gfs2_iomap_alloc(inode, iomap, mp); if (ret) {
gfs2_trans_end(sdp);
gfs2_inplace_release(ip);
punch_hole(ip, iomap->offset, iomap->length); goto out_qunlock;
}
}
tr = current->journal_info; if (tr->tr_num_buf_new)
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
if (gfs2_is_jdata(ip))
iomap->flags |= IOMAP_F_BUFFER_HEAD;
trace_gfs2_iomap_start(ip, pos, length, flags);
ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); if (ret) goto out_unlock;
switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) { case IOMAP_WRITE: if (flags & IOMAP_DIRECT) { /* * Silently fall back to buffered I/O for stuffed files * or if we've got a hole (see gfs2_file_direct_write).
*/ if (iomap->type != IOMAP_MAPPED)
ret = -ENOTBLK; goto out_unlock;
} break; case IOMAP_ZERO: if (iomap->type == IOMAP_HOLE) goto out_unlock; break; default: goto out_unlock;
}
ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
/** * gfs2_block_map - Map one or more blocks of an inode to a disk block * @inode: The inode * @lblock: The logical block number * @bh_map: The bh to be mapped * @create: True if its ok to alloc blocks to satify the request * * The size of the requested mapping is defined in bh_map->b_size. * * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged * when @lblock is not mapped. Sets buffer_mapped(bh_map) and * bh_map->b_size to indicate the size of the mapping when @lblock and * successive blocks are mapped, up to the requested size. * * Sets buffer_boundary() if a read of metadata will be required * before the next block can be mapped. Sets buffer_new() if new * blocks were allocated. * * Returns: errno
*/
ret = gfs2_iomap_alloc(inode, lblock << blkbits, *extlen << blkbits,
&iomap); if (ret) return ret; if (iomap.type != IOMAP_MAPPED) return -EIO;
*dblock = iomap.addr >> blkbits;
len = iomap.length >> blkbits; if (len < *extlen)
*extlen = len;
*new = iomap.flags & IOMAP_F_NEW; return 0;
}
/* * NOTE: Never call gfs2_block_zero_range with an open transaction because it * uses iomap write to perform its actions, which begin their own transactions * (iomap_begin, get_folio, etc.)
*/ staticint gfs2_block_zero_range(struct inode *inode, loff_t from, loff_t length)
{
BUG_ON(current->journal_info); if (from >= inode->i_size) return 0;
length = min(length, inode->i_size - from); return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops,
&gfs2_iomap_write_ops, NULL);
}
#define GFS2_JTRUNC_REVOKES 8192
/** * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files * @inode: The inode being truncated * @oldsize: The original (larger) size * @newsize: The new smaller size * * With jdata files, we have to journal a revoke for each block which is * truncated. As a result, we need to split this into separate transactions * if the number of pages being truncated gets too large.
*/
ret = __gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp); if (!ret && iomap->type == IOMAP_HOLE)
ret = __gfs2_iomap_alloc(inode, iomap, &mp);
release_metapath(&mp); return ret;
}
/** * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein * @ip: inode * @rd_gh: holder of resource group glock * @bh: buffer head to sweep * @start: starting point in bh * @end: end point in bh * @meta: true if bh points to metadata (rather than data) * @btotal: place to keep count of total blocks freed * * We sweep a metadata buffer (provided by the metapath) for blocks we need to * free, and free them all. However, we do it one rgrp at a time. If this * block has references to multiple rgrps, we break it into individual * transactions. This allows other processes to use the rgrps while we're * focused on a single one, for better concurrency / performance. * At every transaction boundary, we rewrite the inode into the journal. * That way the bitmaps are kept consistent with the inode and we can recover * if we're interrupted by power-outages. * * Returns: 0, or return code if an error occurred. * *btotal has the total number of blocks freed
*/ staticint sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, struct buffer_head *bh, __be64 *start, __be64 *end, bool meta, u32 *btotal)
{ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; struct gfs2_trans *tr;
__be64 *p; int blks_outside_rgrp;
u64 bn, bstart, isize_blks;
s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */ int ret = 0; bool buf_in_tr = false; /* buffer was added to transaction */
for (p = start; p < end; p++) { if (!*p) continue;
bn = be64_to_cpu(*p);
if (rgd) { if (!rgrp_contains_block(rgd, bn)) {
blks_outside_rgrp++; continue;
}
} else {
rgd = gfs2_blk2rgrpd(sdp, bn, true); if (unlikely(!rgd)) {
ret = -EIO; goto out;
}
ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
LM_FLAG_NODE_SCOPE, rd_gh); if (ret) goto out;
/* Must be done with the rgrp glock held: */ if (gfs2_rs_active(&ip->i_res) &&
rgd == ip->i_res.rs_rgd)
gfs2_rs_deltree(&ip->i_res);
}
/* The size of our transactions will be unknown until we actually process all the metadata blocks that relate to the rgrp. So we estimate. We know it can't be more than the dinode's i_blocks and we don't want to exceed the
journal flush threshold, sd_log_thresh2. */ if (current->journal_info == NULL) { unsignedint jblocks_rqsted, revokes;
jblocks_rqsted = rgd->rd_length + RES_DINODE +
RES_INDIRECT;
isize_blks = gfs2_get_inode_blocks(&ip->i_inode); if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
jblocks_rqsted +=
atomic_read(&sdp->sd_log_thresh2); else
jblocks_rqsted += isize_blks;
revokes = jblocks_rqsted; if (meta)
revokes += end - start; elseif (ip->i_depth)
revokes += sdp->sd_inptrs;
ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); if (ret) goto out_unlock;
down_write(&ip->i_rw_mutex);
} /* check if we will exceed the transaction blocks requested */
tr = current->journal_info; if (tr->tr_num_buf_new + RES_STATFS +
RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { /* We set blks_outside_rgrp to ensure the loop will be repeated for the same rgrp, but with a new
transaction. */
blks_outside_rgrp++; /* This next part is tricky. If the buffer was added to the transaction, we've already set some block pointers to 0, so we better follow through and free them, or we will introduce corruption (so break). This may be impossible, or at least rare, but I decided to cover the case regardless.
If the buffer was not added to the transaction (this call), doing so would exceed our transaction size, so we need to end the transaction and start a
new one (so goto). */
if (buf_in_tr) break; goto out_unlock;
}
gfs2_trans_add_meta(ip->i_gl, bh);
buf_in_tr = true;
*p = 0; if (bstart + blen == bn) {
blen++; continue;
} if (bstart) {
__gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
(*btotal) += blen;
gfs2_add_inode_blocks(&ip->i_inode, -blen);
}
bstart = bn;
blen = 1;
} if (bstart) {
__gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
(*btotal) += blen;
gfs2_add_inode_blocks(&ip->i_inode, -blen);
}
out_unlock: if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks outside the rgrp we just processed,
do it all over again. */ if (current->journal_info) { struct buffer_head *dibh;
ret = gfs2_meta_inode_buffer(ip, &dibh); if (ret) goto out;
/* Every transaction boundary, we rewrite the dinode
to keep its di_blocks current in case of failure. */
inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode));
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
up_write(&ip->i_rw_mutex);
gfs2_trans_end(sdp);
buf_in_tr = false;
}
gfs2_glock_dq_uninit(rd_gh);
cond_resched(); goto more_rgrps;
}
out: return ret;
}
staticbool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsignedint h)
{ if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0]))) returnfalse; returntrue;
}
/** * find_nonnull_ptr - find a non-null pointer given a metapath and height * @sdp: The superblock * @mp: starting metapath * @h: desired height to search * @end_list: See punch_hole(). * @end_aligned: See punch_hole(). * * Assumes the metapath is valid (with buffers) out to height h. * Returns: true if a non-null pointer was found in the metapath buffer * false if all remaining pointers are NULL in the buffer
*/ staticbool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp, unsignedint h,
__u16 *end_list, unsignedint end_aligned)
{ struct buffer_head *bh = mp->mp_bh[h];
__be64 *first, *ptr, *end;
first = metaptr1(h, mp);
ptr = first + mp->mp_list[h];
end = (__be64 *)(bh->b_data + bh->b_size); if (end_list && mp_eq_to_hgt(mp, end_list, h)) { bool keep_end = h < end_aligned;
end = first + end_list[h] + keep_end;
}
while (ptr < end) { if (*ptr) { /* if we have a non-null pointer */
mp->mp_list[h] = ptr - first;
h++; if (h < GFS2_MAX_META_HEIGHT)
mp->mp_list[h] = 0; returntrue;
}
ptr++;
} returnfalse;
}
enum dealloc_states {
DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
DEALLOC_DONE = 3, /* process complete */
};
if (end_list) { bool keep_end = height < end_aligned; if (!mp_eq_to_hgt(mp, end_list, height)) returnfalse;
end = end_list[height] + keep_end;
} else
end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs; return mp->mp_list[height] >= end;
}
/** * punch_hole - deallocate blocks in a file * @ip: inode to truncate * @offset: the start of the hole * @length: the size of the hole (or 0 for truncate) * * Punch a hole into a file or truncate a file at a given position. This * function operates in whole blocks (@offset and @length are rounded * accordingly); partially filled blocks must be cleared otherwise. * * This function works from the bottom up, and from the right to the left. In * other words, it strips off the highest layer (data) before stripping any of * the metadata. Doing it this way is best in case the operation is interrupted * by power failure, etc. The dinode is rewritten in every transaction to * guarantee integrity.
*/ staticint punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
{ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
u64 maxsize = sdp->sd_heightsize[ip->i_height]; struct metapath mp = {}; struct buffer_head *dibh, *bh; struct gfs2_holder rd_gh; unsignedint bsize_shift = sdp->sd_sb.sb_bsize_shift; unsignedint bsize = 1 << bsize_shift;
u64 lblock = (offset + bsize - 1) >> bsize_shift;
__u16 start_list[GFS2_MAX_META_HEIGHT];
__u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; unsignedint start_aligned, end_aligned; unsignedint strip_h = ip->i_height - 1;
u32 btotal = 0; int ret, state; int mp_h; /* metapath buffers are read in to this height */
u64 prev_bnr = 0;
__be64 *start, *end;
if (offset + bsize - 1 >= maxsize) { /* * The starting point lies beyond the allocated metadata; * there are no blocks to deallocate.
*/ return 0;
}
/* * The start position of the hole is defined by lblock, start_list, and * start_aligned. The end position of the hole is defined by lend, * end_list, and end_aligned. * * start_aligned and end_aligned define down to which height the start * and end positions are aligned to the metadata tree (i.e., the * position is a multiple of the metadata granularity at the height * above). This determines at which heights additional meta pointers * needs to be preserved for the remaining data.
*/
/* * Clip the end at the maximum file size for the given height: * that's how far the metadata goes; files bigger than that * will have additional layers of indirection.
*/ if (end_offset > maxsize)
end_offset = maxsize;
lend = end_offset >> bsize_shift;
if (mp.mp_aheight == ip->i_height)
state = DEALLOC_MP_FULL; /* We have a complete metapath */ else
state = DEALLOC_FILL_MP; /* deal with partial metapath */
ret = gfs2_rindex_update(sdp); if (ret) goto out_metapath;
ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) goto out_metapath;
gfs2_holder_mark_uninitialized(&rd_gh);
mp_h = strip_h;
while (state != DEALLOC_DONE) { switch (state) { /* Truncate a full metapath at the given strip height.
* Note that strip_h == mp_h in order to be in this state. */ case DEALLOC_MP_FULL:
bh = mp.mp_bh[mp_h];
gfs2_assert_withdraw(sdp, bh); if (gfs2_assert_withdraw(sdp,
prev_bnr != bh->b_blocknr)) {
fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u, " "s_h:%u, mp_h:%u\n",
(unsignedlonglong)ip->i_no_addr,
prev_bnr, ip->i_height, strip_h, mp_h);
}
prev_bnr = bh->b_blocknr;
if (gfs2_metatype_check(sdp, bh,
(mp_h ? GFS2_METATYPE_IN :
GFS2_METATYPE_DI))) {
ret = -EIO; goto out;
}
/* * Below, passing end_aligned as 0 gives us the * metapointer range excluding the end point: the end * point is the first metapath we must not deallocate!
*/
/* If we hit an error or just swept dinode buffer,
just exit. */ if (ret || !mp_h) {
state = DEALLOC_DONE; break;
}
state = DEALLOC_MP_LOWER; break;
/* lower the metapath strip height */ case DEALLOC_MP_LOWER: /* We're done with the current buffer, so release it, unless it's the dinode buffer. Then back up to the
previous pointer. */ if (mp_h) {
brelse(mp.mp_bh[mp_h]);
mp.mp_bh[mp_h] = NULL;
} /* If we can't get any lower in height, we've stripped off all we can. Next step is to back up and start
stripping the previous level of metadata. */ if (mp_h == 0) {
strip_h--;
memcpy(mp.mp_list, start_list, sizeof(start_list));
mp_h = strip_h;
state = DEALLOC_FILL_MP; break;
}
mp.mp_list[mp_h] = 0;
mp_h--; /* search one metadata height down */
mp.mp_list[mp_h]++; if (walk_done(sdp, &mp, mp_h, end_list, end_aligned)) break; /* Here we've found a part of the metapath that is not * allocated. We need to search at that height for the
* next non-null pointer. */ if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
state = DEALLOC_FILL_MP;
mp_h++;
} /* No more non-null pointers at this height. Back up
to the previous height and try again. */ break; /* loop around in the same state */
/* Fill the metapath with buffers to the given height. */ case DEALLOC_FILL_MP: /* Fill the buffers out to the current height. */
ret = fillup_metapath(ip, &mp, mp_h); if (ret < 0) goto out;
/* On the first pass, issue read-ahead on metadata. */ if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) { unsignedint height = mp.mp_aheight - 1;
/* No read-ahead for data blocks. */ if (mp.mp_aheight - 1 == strip_h)
height--;
/* If buffers found for the entire strip height */ if (mp.mp_aheight - 1 == strip_h) {
state = DEALLOC_MP_FULL; break;
} if (mp.mp_aheight < ip->i_height) /* We have a partial height */
mp_h = mp.mp_aheight - 1;
/* If we find a non-null block pointer, crawl a bit higher up in the metapath and try again, otherwise
we need to look lower for a new starting point. */ if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
mp_h++; else
state = DEALLOC_MP_LOWER; break;
}
}
if (btotal) { if (current->journal_info == NULL) {
ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
RES_QUOTA, 0); if (ret) goto out;
down_write(&ip->i_rw_mutex);
}
gfs2_statfs_change(sdp, 0, +btotal, 0);
gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
ip->i_inode.i_gid);
inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode));
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
up_write(&ip->i_rw_mutex);
gfs2_trans_end(sdp);
}
out: if (gfs2_holder_initialized(&rd_gh))
gfs2_glock_dq_uninit(&rd_gh); if (current->journal_info) {
up_write(&ip->i_rw_mutex);
gfs2_trans_end(sdp);
cond_resched();
}
gfs2_quota_unhold(ip);
out_metapath:
release_metapath(&mp); return ret;
}
/** * do_shrink - make a file smaller * @inode: the inode * @newsize: the size to make the file * * Called with an exclusive lock on @inode. The @size must * be equal to or smaller than the current inode size. * * Returns: errno
*/
/** * do_grow - Touch and update inode size * @inode: The inode * @size: The new size * * This function updates the timestamps on the inode and * may also increase the size of the inode. This function * must not be called with @size any smaller than the current * inode size. * * Although it is not strictly required to unstuff files here, * earlier versions of GFS2 have a bug in the stuffed file reading * code which will result in a buffer overrun if the size is larger * than the max stuffed file size. In order to prevent this from * occurring, such files are unstuffed, but in other cases we can * just update the inode size directly. * * Returns: 0 on success, or -ve on error
*/
/** * gfs2_setattr_size - make a file a given size * @inode: the inode * @newsize: the size to make the file * * The file size can grow, shrink, or stay the same size. This * is called holding i_rwsem and an exclusive glock on the inode * in question. * * Returns: errno
*/
int gfs2_setattr_size(struct inode *inode, u64 newsize)
{ struct gfs2_inode *ip = GFS2_I(inode); int ret;
BUG_ON(!S_ISREG(inode->i_mode));
ret = inode_newsize_ok(inode, newsize); if (ret) return ret;
inode_dio_wait(inode);
ret = gfs2_qa_get(ip); if (ret) goto out;
if (newsize >= inode->i_size) {
ret = do_grow(inode, newsize); goto out;
}
ret = do_shrink(inode, newsize);
out:
gfs2_rs_delete(ip);
gfs2_qa_put(ip); return ret;
}
int gfs2_truncatei_resume(struct gfs2_inode *ip)
{ int error;
error = punch_hole(ip, i_size_read(&ip->i_inode), 0); if (!error)
error = trunc_end(ip); return error;
}
int gfs2_file_dealloc(struct gfs2_inode *ip)
{ return punch_hole(ip, 0, 0);
}
/** * gfs2_free_journal_extents - Free cached journal bmap info * @jd: The journal *
*/
/** * gfs2_add_jextent - Add or merge a new extent to extent cache * @jd: The journal descriptor * @lblock: The logical block at start of new extent * @dblock: The physical block at start of new extent * @blocks: Size of extent in fs blocks * * Returns: 0 on success or -ENOMEM
*/
/** * gfs2_map_journal_extents - Cache journal bmap info * @sdp: The super block * @jd: The journal to map * * Create a reusable "extent" mapping from all logical * blocks to all physical blocks for the given journal. This will save * us time when writing journal blocks. Most journals will have only one * extent that maps all their logical blocks. That's because gfs2.mkfs * arranges the journal blocks sequentially to maximize performance. * So the extent would map the first block for the entire file length. * However, gfs2_jadd can happen while file activity is happening, so * those journals may not be sequential. Less likely is the case where * the users created their own journals by mounting the metafs and * laying it out. But it's still possible. These journals might have * several extents. * * Returns: 0 on success, or error on failure
*/
/** * gfs2_write_alloc_required - figure out if a write will require an allocation * @ip: the file being written to * @offset: the offset to write to * @len: the number of bytes being written * * Returns: 1 if an alloc is required, 0 otherwise
*/
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.