/* * A typo on the command line could possibly make the kernel run out of memory * and crash. To prevent the crash we account all used memory. We fail if we * exhaust 1/4 of all memory or 1/2 of vmalloc space.
*/ #define DM_STATS_MEMORY_FACTOR 4 #define DM_STATS_VMALLOC_FACTOR 2
a = shared_memory_amount + alloc_size; if (a < shared_memory_amount) returnfalse; if (a >> PAGE_SHIFT > totalram_pages() / DM_STATS_MEMORY_FACTOR) returnfalse; #ifdef CONFIG_MMU if (a > (VMALLOC_END - VMALLOC_START) / DM_STATS_VMALLOC_FACTOR) returnfalse; #endif returntrue;
}
s->n_histogram_entries = n_histogram_entries;
s->histogram_boundaries = kmemdup(histogram_boundaries,
s->n_histogram_entries * sizeof(unsignedlonglong), GFP_KERNEL); if (!s->histogram_boundaries) {
r = -ENOMEM; goto out;
}
s->program_id = kstrdup(program_id, GFP_KERNEL); if (!s->program_id) {
r = -ENOMEM; goto out;
}
s->aux_data = kstrdup(aux_data, GFP_KERNEL); if (!s->aux_data) {
r = -ENOMEM; goto out;
}
for (ni = 0; ni < n_entries; ni++) {
atomic_set(&s->stat_shared[ni].in_flight[READ], 0);
atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0);
cond_resched();
}
if (s->n_histogram_entries) { unsignedlonglong *hi;
hi = dm_kvzalloc(s->histogram_alloc_size, NUMA_NO_NODE); if (!hi) {
r = -ENOMEM; goto out;
} for (ni = 0; ni < n_entries; ni++) {
s->stat_shared[ni].tmp.histogram = hi;
hi += s->n_histogram_entries + 1;
cond_resched();
}
}
for_each_possible_cpu(cpu) {
p = dm_kvzalloc(percpu_alloc_size, cpu_to_node(cpu)); if (!p) {
r = -ENOMEM; goto out;
}
s->stat_percpu[cpu] = p; if (s->n_histogram_entries) { unsignedlonglong *hi;
hi = dm_kvzalloc(s->histogram_alloc_size, cpu_to_node(cpu)); if (!hi) {
r = -ENOMEM; goto out;
} for (ni = 0; ni < n_entries; ni++) {
p[ni].histogram = hi;
hi += s->n_histogram_entries + 1;
cond_resched();
}
}
}
/* * Suspend/resume to make sure there is no i/o in flight, * so that newly created statistics will be exact. * * (note: we couldn't suspend earlier because we must not * allocate memory while suspended)
*/
suspend_callback(md);
mutex_lock(&stats->mutex);
s->id = 0;
list_for_each(l, &stats->list) {
tmp_s = container_of(l, struct dm_stat, list_entry); if (WARN_ON(tmp_s->id < s->id)) {
r = -EINVAL; goto out_unlock_resume;
} if (tmp_s->id > s->id) break; if (unlikely(s->id == INT_MAX)) {
r = -ENFILE; goto out_unlock_resume;
}
s->id++;
}
ret_id = s->id;
list_add_tail_rcu(&s->list_entry, l);
dm_stats_recalc_precise_timestamps(stats);
if (!static_key_enabled(&stats_enabled.key))
static_branch_enable(&stats_enabled);
/* * For strict correctness we should use local_irq_save/restore * instead of preempt_disable/enable. * * preempt_disable/enable is racy if the driver finishes bios * from non-interrupt context as well as from interrupt context * or from more different interrupts. * * On 64-bit architectures the race only results in not counting some * events, so it is acceptable. On 32-bit architectures the race could * cause the counter going off by 2^32, so we need to do proper locking * there. * * part_stat_lock()/part_stat_unlock() have this race too.
*/ #if BITS_PER_LONG == 32 unsignedlong flags;
local_irq_save(flags); #else
preempt_disable(); #endif
p = &s->stat_percpu[smp_processor_id()][entry];
if (!end) { /* * A race condition can at worst result in the merged flag being * misrepresented, so we don't have to disable preemption here.
*/
last = raw_cpu_ptr(stats->last);
stats_aux->merged =
(bi_sector == (READ_ONCE(last->last_sector) &&
((bi_rw == WRITE) ==
(READ_ONCE(last->last_rw) == WRITE))
));
WRITE_ONCE(last->last_sector, end_sector);
WRITE_ONCE(last->last_rw, bi_rw);
} else
duration_jiffies = jiffies - start_time;
rcu_read_lock();
got_precise_time = false;
list_for_each_entry_rcu(s, &stats->list, list_entry) { if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) { /* start (!end) duration_ns is set by DM core's alloc_io() */ if (end)
stats_aux->duration_ns = ktime_to_ns(ktime_get()) - stats_aux->duration_ns;
got_precise_time = true;
}
__dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration_jiffies, stats_aux);
}
for (i = 0; i < s->n_histogram_entries + 1; i++) {
local_irq_disable();
p = &s->stat_percpu[smp_processor_id()][x];
p->histogram[i] -= shared->tmp.histogram[i];
local_irq_enable();
}
}
cond_resched();
}
}
staticint dm_stats_clear(struct dm_stats *stats, int id)
{ struct dm_stat *s;
mutex_lock(&stats->mutex);
s = __dm_stats_find(stats, id); if (!s) {
mutex_unlock(&stats->mutex); return -ENOENT;
}
__dm_stat_clear(s, 0, s->n_entries, true);
mutex_unlock(&stats->mutex);
return 1;
}
/* * This is like jiffies_to_msec, but works for 64-bit values.
*/ staticunsignedlonglong dm_jiffies_to_msec64(struct dm_stat *s, unsignedlonglong j)
{ unsignedlonglong result; unsignedint mult;
if (s->stat_flags & STAT_PRECISE_TIMESTAMPS) return j;
result = 0; if (j)
result = jiffies_to_msecs(j & 0x3fffff); if (j >= 1 << 22) {
mult = jiffies_to_msecs(1 << 22);
result += (unsignedlonglong)mult * (unsignedlonglong)jiffies_to_msecs((j >> 22) & 0x3fffff);
} if (j >= 1ULL << 44)
result += (unsignedlonglong)mult * (unsignedlonglong)mult * (unsignedlonglong)jiffies_to_msecs(j >> 44);
a = dm_shift_arg(&as); if (!strcmp(a, "-")) {
start = 0;
len = dm_get_size(md); if (!len)
len = 1;
} elseif (sscanf(a, "%llu+%llu%c", &start, &len, &dummy) != 2 ||
start != (sector_t)start || len != (sector_t)len) goto ret_einval;
end = start + len; if (start >= end) goto ret_einval;
a = dm_shift_arg(&as); if (sscanf(a, "/%u%c", &divisor, &dummy) == 1) { if (!divisor) return -EINVAL;
step = end - start; if (do_div(step, divisor))
step++; if (!step)
step = 1;
} elseif (sscanf(a, "%llu%c", &step, &dummy) != 1 ||
step != (sector_t)step || !step) goto ret_einval;
as_backup = as;
a = dm_shift_arg(&as); if (a && sscanf(a, "%u%c", &feature_args, &dummy) == 1) { while (feature_args--) {
a = dm_shift_arg(&as); if (!a) goto ret_einval; if (!strcasecmp(a, "precise_timestamps"))
stat_flags |= STAT_PRECISE_TIMESTAMPS; elseif (!strncasecmp(a, "histogram:", 10)) { if (n_histogram_entries) goto ret_einval;
r = parse_histogram(a + 10, &n_histogram_entries, &histogram_boundaries); if (r) goto ret;
} else goto ret_einval;
}
} else {
as = as_backup;
}
program_id = "-";
aux_data = "-";
a = dm_shift_arg(&as); if (a)
program_id = a;
a = dm_shift_arg(&as); if (a)
aux_data = a;
if (as.argc) goto ret_einval;
/* * If a buffer overflow happens after we created the region, * it's too late (the userspace would retry with a larger * buffer, but the region id that caused the overflow is already * leaked). So we must detect buffer overflow in advance.
*/
snprintf(result, maxlen, "%d", INT_MAX); if (dm_message_test_buffer_overflow(result, maxlen)) {
r = 1; goto ret;
}
id = dm_stats_create(dm_get_stats(md), start, end, step, stat_flags,
n_histogram_entries, histogram_boundaries, program_id, aux_data,
dm_internal_suspend_fast, dm_internal_resume_fast, md); if (id < 0) {
r = id; goto ret;
}
snprintf(result, maxlen, "%d", id);
r = 1; goto ret;
ret_einval:
r = -EINVAL;
ret:
kfree(histogram_boundaries); return r;
}
void dm_statistics_exit(void)
{ if (dm_stat_need_rcu_barrier)
rcu_barrier(); if (WARN_ON(shared_memory_amount))
DMCRIT("shared_memory_amount leaked: %lu", shared_memory_amount);
}
module_param_named(stats_current_allocated_bytes, shared_memory_amount, ulong, 0444);
MODULE_PARM_DESC(stats_current_allocated_bytes, "Memory currently used by statistics");
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.