/* * Set parameters for inode allocation heuristics, taking into account * filesystem size and inode32/inode64 mount options; i.e. specifically * whether or not XFS_FEAT_SMALL_INUMS is set. * * Inode allocation patterns are altered only if inode32 is requested * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. * If altered, XFS_OPSTATE_INODE32 is set as well. * * An agcount independent of that in the mount structure is provided * because in the growfs case, mp->m_sb.sb_agcount is not yet updated * to the potentially higher ag count. * * Returns the maximum AG index which may contain inodes.
*/
xfs_agnumber_t
xfs_set_inode_alloc( struct xfs_mount *mp,
xfs_agnumber_t agcount)
{
xfs_agnumber_t index;
xfs_agnumber_t maxagi = 0;
xfs_sb_t *sbp = &mp->m_sb;
xfs_agnumber_t max_metadata;
xfs_agino_t agino;
xfs_ino_t ino;
/* * Calculate how much should be reserved for inodes to meet * the max inode percentage. Used only for inode32.
*/ if (M_IGEO(mp)->maxicount) {
uint64_t icount;
/* Get the last possible inode in the filesystem */
agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
/* * If user asked for no more than 32-bit inodes, and the fs is * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter * the allocator to accommodate the request.
*/ if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
xfs_set_inode32(mp); else
xfs_clear_inode32(mp);
for (index = 0; index < agcount; index++) { struct xfs_perag *pag;
staticint
xfs_setup_dax_always( struct xfs_mount *mp)
{ if (!mp->m_ddev_targp->bt_daxdev &&
(!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
xfs_alert(mp, "DAX unsupported by block device. Turning off DAX."); goto disable_dax;
}
if (mp->m_super->s_blocksize != PAGE_SIZE) {
xfs_alert(mp, "DAX not supported for blocksize. Turning off DAX."); goto disable_dax;
}
if (xfs_has_reflink(mp) &&
bdev_is_partition(mp->m_ddev_targp->bt_bdev)) {
xfs_alert(mp, "DAX and reflink cannot work with multi-partitions!"); return -EINVAL;
}
STATICvoid
xfs_shutdown_devices( struct xfs_mount *mp)
{ /* * Udev is triggered whenever anyone closes a block device or unmounts * a file systemm on a block device. * The default udev rules invoke blkid to read the fs super and create * symlinks to the bdev under /dev/disk. For this, it uses buffered * reads through the page cache. * * xfs_db also uses buffered reads to examine metadata. There is no * coordination between xfs_db and udev, which means that they can run * concurrently. Note there is no coordination between the kernel and * blkid either. * * On a system with 64k pages, the page cache can cache the superblock * and the root inode (and hence the root directory) with the same 64k * page. If udev spawns blkid after the mkfs and the system is busy * enough that it is still running when xfs_db starts up, they'll both * read from the same page in the pagecache. * * The unmount writes updated inode metadata to disk directly. The XFS * buffer cache does not use the bdev pagecache, so it needs to * invalidate that pagecache on unmount. If the above scenario occurs, * the pagecache no longer reflects what's on disk, xfs_db reads the * stale metadata, and fails to find /a. Most of the time this succeeds * because closing a bdev invalidates the page cache, but when processes * race, everyone loses.
*/ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
blkdev_issue_flush(mp->m_logdev_targp->bt_bdev);
invalidate_bdev(mp->m_logdev_targp->bt_bdev);
} if (mp->m_rtdev_targp) {
blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
invalidate_bdev(mp->m_rtdev_targp->bt_bdev);
}
blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
invalidate_bdev(mp->m_ddev_targp->bt_bdev);
}
/* * The file system configurations are: * (1) device (partition) with data and internal log * (2) logical volume with data and log subvolumes. * (3) logical volume with data, log, and realtime subvolumes. * * We only have to handle opening the log and realtime volumes here if * they are present. The data subvolume has already been opened by * get_sb_bdev() and is stored in sb->s_bdev.
*/ STATICint
xfs_open_devices( struct xfs_mount *mp)
{ struct super_block *sb = mp->m_super; struct block_device *ddev = sb->s_bdev; struct file *logdev_file = NULL, *rtdev_file = NULL; int error;
/* * Open real time and log devices - order is important.
*/ if (mp->m_logname) {
error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file); if (error) return error;
}
if (mp->m_rtname) {
error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file); if (error) goto out_close_logdev;
if (file_bdev(rtdev_file) == ddev ||
(logdev_file &&
file_bdev(rtdev_file) == file_bdev(logdev_file))) {
xfs_warn(mp, "Cannot mount filesystem with identical rtdev and ddev/logdev.");
error = -EINVAL; goto out_close_rtdev;
}
}
if (xfs_has_sector(mp))
log_sector_size = mp->m_sb.sb_logsectsize;
error = xfs_configure_buftarg(mp->m_logdev_targp,
log_sector_size); if (error) return error;
}
if (mp->m_sb.sb_rtstart) { if (mp->m_rtdev_targp) {
xfs_warn(mp, "can't use internal and external rtdev at the same time"); return -EINVAL;
}
mp->m_rtdev_targp = mp->m_ddev_targp;
} elseif (mp->m_rtname) {
error = xfs_configure_buftarg(mp->m_rtdev_targp,
mp->m_sb.sb_sectsize); if (error) return error;
}
if (down_read_trylock(&sb->s_umount)) {
sync_inodes_sb(sb);
up_read(&sb->s_umount);
}
}
/* * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting * for IO to complete so that we effectively throttle multiple callers to the * rate at which IO is completing.
*/ void
xfs_flush_inodes( struct xfs_mount *mp)
{ /* * If flush_work() returns true then that means we waited for a flush * which was already in progress. Don't bother running another scan.
*/ if (flush_work(&mp->m_flush_inodes_work)) return;
/* Catch misguided souls that try to use this interface on XFS */ STATICstruct inode *
xfs_fs_alloc_inode( struct super_block *sb)
{
BUG(); return NULL;
}
/* * Now that the generic code is guaranteed not to be accessing * the linux inode, we can inactivate and reclaim the inode.
*/ STATICvoid
xfs_fs_destroy_inode( struct inode *inode)
{ struct xfs_inode *ip = XFS_I(inode);
if (!(inode->i_sb->s_flags & SB_LAZYTIME)) return;
/* * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC) * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed * in flags possibly together with I_DIRTY_SYNC.
*/ if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME)) return;
/* * Slab object creation initialisation for the XFS inode. * This covers only the idempotent fields in the XFS inode; * all other fields need to be initialised on allocation * from the slab. This avoids the need to repeatedly initialise * fields in the xfs inode that left in the initialise state * when freeing the inode.
*/ STATICvoid
xfs_fs_inode_init_once( void *inode)
{ struct xfs_inode *ip = inode;
/* * We do an unlocked check for XFS_IDONTCACHE here because we are already * serialised against cache hits here via the inode->i_lock and igrab() in * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be * racing with us, and it avoids needing to grab a spinlock here for every inode * we drop the final reference on.
*/ STATICint
xfs_fs_drop_inode( struct inode *inode)
{ struct xfs_inode *ip = XFS_I(inode);
/* * If this unlinked inode is in the middle of recovery, don't * drop the inode just yet; log recovery will take care of * that. See the comment for this inode flag.
*/ if (ip->i_flags & XFS_IRECOVERY) {
ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); return 0;
}
return generic_drop_inode(inode);
}
STATICvoid
xfs_fs_evict_inode( struct inode *inode)
{ if (IS_DAX(inode))
dax_break_layout_final(inode);
STATICint
xfs_fs_sync_fs( struct super_block *sb, int wait)
{ struct xfs_mount *mp = XFS_M(sb); int error;
trace_xfs_fs_sync_fs(mp, __return_address);
/* * Doing anything during the async pass would be counterproductive.
*/ if (!wait) return 0;
error = xfs_log_force(mp, XFS_LOG_SYNC); if (error) return error;
if (laptop_mode) { /* * The disk must be active because we're syncing. * We schedule log work now (now that the disk is * active) instead of later (when it might not be).
*/
flush_delayed_work(&mp->m_log->l_work);
}
/* * If we are called with page faults frozen out, it means we are about * to freeze the transaction subsystem. Take the opportunity to shut * down inodegc because once SB_FREEZE_FS is set it's too late to * prevent inactivation races with freeze. The fs doesn't get called * again by the freezing process until after SB_FREEZE_FS has been set, * so it's now or never. Same logic applies to speculative allocation * garbage collection. * * We don't care if this is a normal syncfs call that does this or * freeze that does this - we can run this multiple times without issue * and we won't race with a restart because a restart can only occur * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
*/ if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
xfs_inodegc_stop(mp);
xfs_blockgc_stop(mp);
xfs_zone_gc_stop(mp);
}
/* make sure st->f_bfree does not underflow */
st->f_bfree = max(0LL,
fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS));
/* * sb_dblocks can change during growfs, but nothing cares about reporting * the old or new value during growfs.
*/
st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp);
}
/* * When stat(v)fs is called on a file with the realtime bit set or a directory * with the rtinherit bit, report freespace information for the RT device * instead of the main data device.
*/ staticvoid
xfs_statfs_rt( struct xfs_mount *mp, struct kstatfs *st)
{
st->f_bfree = xfs_rtbxlen_to_blen(mp,
xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
st->f_blocks = mp->m_sb.sb_rblocks - xfs_rtbxlen_to_blen(mp,
mp->m_free[XC_FREE_RTEXTENTS].res_total);
}
/* * Expedite background inodegc but don't wait. We do not want to block * here waiting hours for a billion extent file to be truncated.
*/
xfs_inodegc_push(mp);
for (i = 0; i < XC_FREE_NR; i++) { if (mp->m_free[i].res_saved) {
resblks = mp->m_free[i].res_saved;
mp->m_free[i].res_saved = 0;
} else
resblks = xfs_default_resblks(mp, i);
xfs_reserve_blocks(mp, i, resblks);
}
}
/* * Second stage of a freeze. The data is already frozen so we only * need to take care of the metadata. Once that's done sync the superblock * to the log to dirty it in case of a crash while frozen. This ensures that we * will recover the unlinked inode lists on the next mount.
*/ STATICint
xfs_fs_freeze( struct super_block *sb)
{ struct xfs_mount *mp = XFS_M(sb); unsignedint flags; int ret;
/* * The filesystem is now frozen far enough that memory reclaim * cannot safely operate on the filesystem. Hence we need to * set a GFP_NOFS context here to avoid recursion deadlocks.
*/
flags = memalloc_nofs_save();
xfs_save_resvblks(mp);
ret = xfs_log_quiesce(mp);
memalloc_nofs_restore(flags);
/* * For read-write filesystems, we need to restart the inodegc on error * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not * going to be run to restart it now. We are at SB_FREEZE_FS level * here, so we can restart safely without racing with a stop in * xfs_fs_sync_fs().
*/ if (ret && !xfs_is_readonly(mp)) {
xfs_blockgc_start(mp);
xfs_inodegc_start(mp);
xfs_zone_gc_start(mp);
}
/* * Don't reactivate the inodegc worker on a readonly filesystem because * inodes are sent directly to reclaim. Don't reactivate the blockgc * worker because there are no speculative preallocations on a readonly * filesystem.
*/ if (!xfs_is_readonly(mp)) {
xfs_zone_gc_start(mp);
xfs_blockgc_start(mp);
xfs_inodegc_start(mp);
}
return 0;
}
/* * This function fills in xfs_mount_t fields based on mount args. * Note: the superblock _has_ now been read in.
*/ STATICint
xfs_finish_flags( struct xfs_mount *mp)
{ /* Fail a mount where the logbuf is smaller than the log stripe */ if (xfs_has_logv2(mp)) { if (mp->m_logbsize <= 0 &&
mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
mp->m_logbsize = mp->m_sb.sb_logsunit;
} elseif (mp->m_logbsize > 0 &&
mp->m_logbsize < mp->m_sb.sb_logsunit) {
xfs_warn(mp, "logbuf size must be greater than or equal to log stripe size"); return -EINVAL;
}
} else { /* Fail a mount if the logbuf is larger than 32K */ if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
xfs_warn(mp, "logbuf size for version 1 logs must be 16K or 32K"); return -EINVAL;
}
}
/* * V5 filesystems always use attr2 format for attributes.
*/ if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. " "attr2 is always enabled for V5 filesystems."); return -EINVAL;
}
/* * prohibit r/w mounts of read-only filesystems
*/ if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
xfs_warn(mp, "cannot mount a read-only filesystem as read-write"); return -EROFS;
}
if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
(mp->m_qflags & XFS_PQUOTA_ACCT) &&
!xfs_has_pquotino(mp)) {
xfs_warn(mp, "Super block does not support project and group quota together"); return -EINVAL;
}
if (!xfs_has_zoned(mp)) { if (mp->m_max_open_zones) {
xfs_warn(mp, "max_open_zones mount option only supported on zoned file systems."); return -EINVAL;
} if (mp->m_features & XFS_FEAT_NOLIFETIME) {
xfs_warn(mp, "nolifetime mount option only supported on zoned file systems."); return -EINVAL;
}
}
return 0;
}
staticint
xfs_init_percpu_counters( struct xfs_mount *mp)
{ int error; int i;
error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); if (error) return -ENOMEM;
error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); if (error) goto free_icount;
error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); if (error) goto free_ifree;
error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL); if (error) goto free_delalloc;
for (i = 0; i < XC_FREE_NR; i++) {
error = percpu_counter_init(&mp->m_free[i].count, 0,
GFP_KERNEL); if (error) goto free_freecounters;
}
if (kstrtoull(value, base, &_res))
ret = -EINVAL;
kfree(value);
*res = _res << shift_left_factor; return ret;
}
staticinlinevoid
xfs_fs_warn_deprecated( struct fs_context *fc, struct fs_parameter *param)
{ /* * Always warn about someone passing in a deprecated mount option. * Previously we wouldn't print the warning if we were reconfiguring * and current mount point already had the flag set, but that was not * the right thing to do. * * Many distributions mount the root filesystem with no options in the * initramfs and rely on mount -a to remount the root fs with the * options in fstab. However, the old behavior meant that there would * never be a warning about deprecated mount options for the root fs in * /etc/fstab. On a single-fs system, that means no warning at all. * * Compounding this problem are distribution scripts that copy * /proc/mounts to fstab, which means that we can't remove mount * options unless we're 100% sure they have only ever been advertised * in /proc/mounts in response to explicitly provided mount options.
*/
xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
}
/* * Set mount state from a mount option. * * NOTE: mp->m_super is NULL here!
*/ staticint
xfs_fs_parse_param( struct fs_context *fc, struct fs_parameter *param)
{ struct xfs_mount *parsing_mp = fc->s_fs_info; struct fs_parse_result result; int size = 0; int opt;
switch (opt) { case Opt_logbufs:
parsing_mp->m_logbufs = result.uint_32; return 0; case Opt_logbsize: if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) return -EINVAL; return 0; case Opt_logdev:
kfree(parsing_mp->m_logname);
parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); if (!parsing_mp->m_logname) return -ENOMEM; return 0; case Opt_rtdev:
kfree(parsing_mp->m_rtname);
parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); if (!parsing_mp->m_rtname) return -ENOMEM; return 0; case Opt_allocsize: if (suffix_kstrtoint(param->string, 10, &size)) return -EINVAL;
parsing_mp->m_allocsize_log = ffs(size) - 1;
parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; return 0; case Opt_grpid: case Opt_bsdgroups:
parsing_mp->m_features |= XFS_FEAT_GRPID; return 0; case Opt_nogrpid: case Opt_sysvgroups:
parsing_mp->m_features &= ~XFS_FEAT_GRPID; return 0; case Opt_wsync:
parsing_mp->m_features |= XFS_FEAT_WSYNC; return 0; case Opt_norecovery:
parsing_mp->m_features |= XFS_FEAT_NORECOVERY; return 0; case Opt_noalign:
parsing_mp->m_features |= XFS_FEAT_NOALIGN; return 0; case Opt_swalloc:
parsing_mp->m_features |= XFS_FEAT_SWALLOC; return 0; case Opt_sunit:
parsing_mp->m_dalign = result.uint_32; return 0; case Opt_swidth:
parsing_mp->m_swidth = result.uint_32; return 0; case Opt_inode32:
parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; return 0; case Opt_inode64:
parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; return 0; case Opt_nouuid:
parsing_mp->m_features |= XFS_FEAT_NOUUID; return 0; case Opt_largeio:
parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; return 0; case Opt_nolargeio:
parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; return 0; case Opt_filestreams:
parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; return 0; case Opt_noquota:
parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_quota: case Opt_uquota: case Opt_usrquota:
parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_qnoenforce: case Opt_uqnoenforce:
parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_pquota: case Opt_prjquota:
parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_pqnoenforce:
parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_gquota: case Opt_grpquota:
parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_gqnoenforce:
parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_discard:
parsing_mp->m_features |= XFS_FEAT_DISCARD; return 0; case Opt_nodiscard:
parsing_mp->m_features &= ~XFS_FEAT_DISCARD; return 0; #ifdef CONFIG_FS_DAX case Opt_dax:
xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); return 0; case Opt_dax_enum:
xfs_mount_set_dax_mode(parsing_mp, result.uint_32); return 0; #endif /* Following mount options will be removed in September 2025 */ case Opt_ikeep:
xfs_fs_warn_deprecated(fc, param);
parsing_mp->m_features |= XFS_FEAT_IKEEP; return 0; case Opt_noikeep:
xfs_fs_warn_deprecated(fc, param);
parsing_mp->m_features &= ~XFS_FEAT_IKEEP; return 0; case Opt_attr2:
xfs_fs_warn_deprecated(fc, param);
parsing_mp->m_features |= XFS_FEAT_ATTR2; return 0; case Opt_noattr2:
xfs_fs_warn_deprecated(fc, param);
parsing_mp->m_features |= XFS_FEAT_NOATTR2; return 0; case Opt_max_open_zones:
parsing_mp->m_max_open_zones = result.uint_32; return 0; case Opt_lifetime:
parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME; return 0; case Opt_nolifetime:
parsing_mp->m_features |= XFS_FEAT_NOLIFETIME; return 0; case Opt_max_atomic_write: if (suffix_kstrtoull(param->string, 10,
&parsing_mp->m_awu_max_bytes)) {
xfs_warn(parsing_mp, "max atomic write size must be positive integer"); return -EINVAL;
} return 0; default:
xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); return -EINVAL;
}
return 0;
}
staticint
xfs_fs_validate_params( struct xfs_mount *mp)
{ /* No recovery flag requires a read-only mount */ if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
xfs_warn(mp, "no-recovery mounts must be read-only."); return -EINVAL;
}
/* * We have not read the superblock at this point, so only the attr2 * mount option can set the attr2 feature by this stage.
*/ if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
xfs_warn(mp, "attr2 and noattr2 cannot both be specified."); return -EINVAL;
}
if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
xfs_warn(mp, "sunit and swidth options incompatible with the noalign option"); return -EINVAL;
}
if (!IS_ENABLED(CONFIG_XFS_QUOTA) &&
(mp->m_qflags & ~XFS_QFLAGS_MNTOPTS)) {
xfs_warn(mp, "quota support not available in this kernel."); return -EINVAL;
}
if ((mp->m_dalign && !mp->m_swidth) ||
(!mp->m_dalign && mp->m_swidth)) {
xfs_warn(mp, "sunit and swidth must be specified together"); return -EINVAL;
}
if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) {
xfs_warn(mp, "stripe width (%d) must be a multiple of the stripe unit (%d)",
mp->m_swidth, mp->m_dalign); return -EINVAL;
}
/* * Copy VFS mount flags from the context now that all parameter parsing * is guaranteed to have been completed by either the old mount API or * the newer fsopen/fsconfig API.
*/ if (fc->sb_flags & SB_RDONLY)
xfs_set_readonly(mp); if (fc->sb_flags & SB_DIRSYNC)
mp->m_features |= XFS_FEAT_DIRSYNC; if (fc->sb_flags & SB_SYNCHRONOUS)
mp->m_features |= XFS_FEAT_WSYNC;
error = xfs_fs_validate_params(mp); if (error) return error;
/* * Delay mount work if the debug hook is set. This is debug * instrumention to coordinate simulation of xfs mount failures with * VFS superblock operations
*/ if (xfs_globals.mount_delay) {
xfs_notice(mp, "Delaying mount for %d seconds.",
xfs_globals.mount_delay);
msleep(xfs_globals.mount_delay * 1000);
}
if (fc->sb_flags & SB_SILENT)
flags |= XFS_MFSI_QUIET;
error = xfs_open_devices(mp); if (error) return error;
error = xfs_init_mount_workqueues(mp); if (error) goto out_shutdown_devices;
error = xfs_init_percpu_counters(mp); if (error) goto out_destroy_workqueues;
error = xfs_inodegc_init_percpu(mp); if (error) goto out_destroy_counters;
/* Allocate stats memory before we do operations that might use it */
mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); if (!mp->m_stats.xs_stats) {
error = -ENOMEM; goto out_destroy_inodegc;
}
error = xchk_mount_stats_alloc(mp); if (error) goto out_free_stats;
error = xfs_readsb(mp, flags); if (error) goto out_free_scrub_stats;
error = xfs_finish_flags(mp); if (error) goto out_free_sb;
error = xfs_setup_devices(mp); if (error) goto out_free_sb;
/* * V4 support is undergoing deprecation. * * Note: this has to use an open coded m_features check as xfs_has_crc * always returns false for !CONFIG_XFS_SUPPORT_V4.
*/ if (!(mp->m_features & XFS_FEAT_CRC)) { if (!IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) {
xfs_warn(mp, "Deprecated V4 format (crc=0) not supported by kernel.");
error = -EINVAL; goto out_free_sb;
}
xfs_warn_once(mp, "Deprecated V4 format (crc=0) will not be supported after September 2030.");
}
/* ASCII case insensitivity is undergoing deprecation. */ if (xfs_has_asciici(mp)) { #ifdef CONFIG_XFS_SUPPORT_ASCII_CI
xfs_warn_once(mp, "Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030."); #else
xfs_warn(mp, "Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel.");
error = -EINVAL; goto out_free_sb; #endif
}
/* * Filesystem claims it needs repair, so refuse the mount unless * norecovery is also specified, in which case the filesystem can * be mounted with no risk of further damage.
*/ if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) {
xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair.");
error = -EFSCORRUPTED; goto out_free_sb;
}
/* * Don't touch the filesystem if a user tool thinks it owns the primary * superblock. mkfs doesn't clear the flag from secondary supers, so * we don't check them at all.
*/ if (mp->m_sb.sb_inprogress) {
xfs_warn(mp, "Offline file system operation in progress!");
error = -EFSCORRUPTED; goto out_free_sb;
}
if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
size_t max_folio_size = mapping_max_folio_size_supported();
if (!xfs_has_crc(mp)) {
xfs_warn(mp, "V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.",
mp->m_sb.sb_blocksize, PAGE_SIZE);
error = -ENOSYS; goto out_free_sb;
}
if (mp->m_sb.sb_blocksize > max_folio_size) {
xfs_warn(mp, "block size (%u bytes) not supported; Only block size (%zu) or less is supported",
mp->m_sb.sb_blocksize, max_folio_size);
error = -ENOSYS; goto out_free_sb;
}
/* Ensure this filesystem fits in the page cache limits */ if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) ||
xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) {
xfs_warn(mp, "file system too large to be mounted on this system.");
error = -EFBIG; goto out_free_sb;
}
/* * XFS block mappings use 54 bits to store the logical block offset. * This should suffice to handle the maximum file size that the VFS * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON * to check this assertion. * * Avoid integer overflow by comparing the maximum bmbt offset to the * maximum pagecache offset in units of fs blocks.
*/ if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) {
xfs_warn(mp, "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
XFS_MAX_FILEOFF);
error = -EINVAL; goto out_free_sb;
}
error = xfs_rtmount_readsb(mp); if (error) goto out_free_sb;
error = xfs_filestream_mount(mp); if (error) goto out_free_rtsb;
/* * we must configure the block size in the superblock before we run the * full mount process as the mount process can lookup and cache inodes.
*/
sb->s_magic = XFS_SUPER_MAGIC;
sb->s_blocksize = mp->m_sb.sb_blocksize;
sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_max_links = XFS_MAXLINK;
sb->s_time_gran = 1; if (xfs_has_bigtime(mp)) {
sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
} else {
sb->s_time_min = XFS_LEGACY_TIME_MIN;
sb->s_time_max = XFS_LEGACY_TIME_MAX;
}
trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max);
sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM;
set_posix_acl_flag(sb);
/* version 5 superblocks support inode version counters. */ if (xfs_has_crc(mp))
sb->s_flags |= SB_I_VERSION;
if (xfs_has_dax_always(mp)) {
error = xfs_setup_dax_always(mp); if (error) goto out_filestream_unmount;
}
if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) {
xfs_warn(mp, "mounting with \"discard\" option, but the device does not support discard");
mp->m_features &= ~XFS_FEAT_DISCARD;
}
if (xfs_has_zoned(mp)) { if (!xfs_has_metadir(mp)) {
xfs_alert(mp, "metadir feature required for zoned realtime devices.");
error = -EINVAL; goto out_filestream_unmount;
}
xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED);
} elseif (xfs_has_metadir(mp)) {
xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR);
}
if (xfs_has_reflink(mp)) { if (xfs_has_realtime(mp) &&
!xfs_reflink_supports_rextsize(mp, mp->m_sb.sb_rextsize)) {
xfs_alert(mp, "reflink not compatible with realtime extent size %u!",
mp->m_sb.sb_rextsize);
error = -EINVAL; goto out_filestream_unmount;
}
if (xfs_has_zoned(mp)) {
xfs_alert(mp, "reflink not compatible with zoned RT device!");
error = -EINVAL; goto out_filestream_unmount;
}
/* * If no quota mount options were provided, maybe we'll try to pick * up the quota accounting and enforcement flags from the ondisk sb.
*/ if (!(mp->m_qflags & XFS_QFLAGS_MNTOPTS))
xfs_set_resuming_quotaon(mp);
mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
error = xfs_mountfs(mp); if (error) goto out_filestream_unmount;
/* * If this is the first remount to writeable state we might have some * superblock changes to update.
*/ if (mp->m_update_sb) {
error = xfs_sync_sb(mp, false); if (error) {
xfs_warn(mp, "failed to write sb changes"); return error;
}
mp->m_update_sb = false;
}
/* * Fill out the reserve pool if it is empty. Use the stashed value if * it is non-zero, otherwise go with the default.
*/
xfs_restore_resvblks(mp);
xfs_log_work_queue(mp);
xfs_blockgc_start(mp);
/* Create the per-AG metadata reservation pool .*/
error = xfs_fs_reserve_ag_blocks(mp); if (error && error != -ENOSPC) return error;
/* Re-enable the background inode inactivation worker. */
xfs_inodegc_start(mp);
/* Flush all the dirty data to disk. */
error = sync_filesystem(mp->m_super); if (error) return error;
/* * Cancel background eofb scanning so it cannot race with the final * log force+buftarg wait and deadlock the remount.
*/
xfs_blockgc_stop(mp);
/* * Clear out all remaining COW staging extents and speculative post-EOF * preallocations so that we don't leave inodes requiring inactivation * cleanups during reclaim on a read-only mount. We must process every * cached inode, so this requires a synchronous cache scan.
*/
error = xfs_blockgc_free_space(mp, &icw); if (error) {
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error;
}
/* * Stop the inodegc background worker. xfs_fs_reconfigure already * flushed all pending inodegc work when it sync'd the filesystem. * The VFS holds s_umount, so we know that inodes cannot enter * xfs_fs_destroy_inode during a remount operation. In readonly mode * we send inodes straight to reclaim, so no inodes will be queued.
*/
xfs_inodegc_stop(mp);
/* Stop zone reclaim */
xfs_zone_gc_stop(mp);
/* Free the per-AG metadata reservation pool. */
xfs_fs_unreserve_ag_blocks(mp);
/* * Before we sync the metadata, we need to free up the reserve block * pool so that the used block count in the superblock on disk is * correct at the end of the remount. Stash the current* reserve pool * size so that if we get remounted rw, we can return it to the same * size.
*/
xfs_save_resvblks(mp);
xfs_log_clean(mp);
xfs_set_readonly(mp);
return 0;
}
/* * Logically we would return an error here to prevent users from believing * they might have changed mount options using remount which can't be changed. * * But unfortunately mount(8) adds all options from mtab and fstab to the mount * arguments in some cases so we can't blindly reject options, but have to * check for each specified option if it actually differs from the currently * set option and only reject it if that's the case. * * Until that is implemented we return success for every remount request, and * silently ignore all options that we can't actually change.
*/ staticint
xfs_fs_reconfigure( struct fs_context *fc)
{ struct xfs_mount *mp = XFS_M(fc->root->d_sb); struct xfs_mount *new_mp = fc->s_fs_info; int flags = fc->sb_flags; int error;
new_mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
/* version 5 superblocks always support version counters. */ if (xfs_has_crc(mp))
fc->sb_flags |= SB_I_VERSION;
error = xfs_fs_validate_params(new_mp); if (error) return error;
/* attr2 -> noattr2 */ if (xfs_has_noattr2(new_mp)) { if (xfs_has_crc(mp)) {
xfs_warn(mp, "attr2 is always enabled for a V5 filesystem - can't be changed."); return -EINVAL;
}
mp->m_features &= ~XFS_FEAT_ATTR2;
mp->m_features |= XFS_FEAT_NOATTR2;
} elseif (xfs_has_attr2(new_mp)) { /* noattr2 -> attr2 */
mp->m_features &= ~XFS_FEAT_NOATTR2;
mp->m_features |= XFS_FEAT_ATTR2;
}
/* Validate new max_atomic_write option before making other changes */ if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) {
error = xfs_set_max_atomic_write_opt(mp,
new_mp->m_awu_max_bytes); if (error) return error;
}
/* * Now that mp has been modified according to the remount options, we * do a final option validation with xfs_finish_flags() just like it is * just like it is done during mount. We cannot use * done during mount. We cannot use xfs_finish_flags() on new_mp as it * contains only the user given options.
*/
error = xfs_finish_flags(mp); if (error) return error;
/* ro -> rw */ if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
error = xfs_remount_rw(mp); if (error) return error;
}
/* rw -> ro */ if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
error = xfs_remount_ro(mp); if (error) return error;
}
/* * mp is stored in the fs_context when it is initialized. * mp is transferred to the superblock on a successful mount, * but if an error occurs before the transfer we have to free * it here.
*/ if (mp)
xfs_mount_free(mp);
}
/* * WARNING: do not initialise any parameters in this function that depend on * mount option parsing having already been performed as this can be called from * fsopen() before any parameters have been set.
*/ staticint
xfs_init_fs_context( struct fs_context *fc)
{ struct xfs_mount *mp; int i;
spin_lock_init(&mp->m_sb_lock); for (i = 0; i < XG_TYPE_MAX; i++)
xa_init(&mp->m_groups[i].xa);
mutex_init(&mp->m_growlock);
mutex_init(&mp->m_metafile_resv_lock);
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
mp->m_kobj.kobject.kset = xfs_kset; /* * We don't create the finobt per-ag space reservation until after log * recovery, so we must set this to true so that an ifree transaction * started during log recovery will not depend on space reservations * for finobt expansion.
*/
mp->m_finobt_nores = true;
/* * These can be overridden by the mount option parsing.
*/
mp->m_logbufs = -1;
mp->m_logbsize = -1;
mp->m_allocsize_log = 16; /* 64k */
/* * The size of the cache-allocated buf log item is the maximum * size possible under XFS. This wastes a little bit of memory, * but it is much faster.
*/
xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", sizeof(struct xfs_buf_log_item),
0, 0, NULL); if (!xfs_buf_item_cache) goto out_destroy_trans_cache;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.