Commit 8c2f7e86 authored by Dan Williams's avatar Dan Williams

libnvdimm: infrastructure for btt devices

NVDIMM namespaces, in addition to accepting "struct bio" based requests,
also have the capability to perform byte-aligned accesses.  By default
only the bio/block interface is used.  However, if another driver can
make effective use of the byte-aligned capability it can claim namespace
interface and use the byte-aligned ->rw_bytes() interface.

The BTT driver is the initial first consumer of this mechanism to allow
adding atomic sector update semantics to a pmem or blk namespace.  This
patch is the sysfs infrastructure to allow configuring a BTT instance
for a namespace.  Enabling that BTT and performing i/o is in a
subsequent patch.

Cc: Greg KH <[email protected]>
Cc: Neil Brown <[email protected]>
Signed-off-by: default avatarDan Williams <[email protected]>
parent 0ba1c634
......@@ -33,4 +33,7 @@ config BLK_DEV_PMEM
Say Y if you want to use an NVDIMM
config BTT
def_bool y
endif
......@@ -11,3 +11,4 @@ libnvdimm-y += region_devs.o
libnvdimm-y += region.o
libnvdimm-y += namespace_devs.o
libnvdimm-y += label.o
libnvdimm-$(CONFIG_BTT) += btt_devs.o
/*
* Block Translation Table library
* Copyright (c) 2014-2015, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _LINUX_BTT_H
#define _LINUX_BTT_H
#include <linux/types.h>
#define BTT_SIG_LEN 16
#define BTT_SIG "BTT_ARENA_INFO\0"
struct btt_sb {
u8 signature[BTT_SIG_LEN];
u8 uuid[16];
u8 parent_uuid[16];
__le32 flags;
__le16 version_major;
__le16 version_minor;
__le32 external_lbasize;
__le32 external_nlba;
__le32 internal_lbasize;
__le32 internal_nlba;
__le32 nfree;
__le32 infosize;
__le64 nextoff;
__le64 dataoff;
__le64 mapoff;
__le64 logoff;
__le64 info2off;
u8 padding[3968];
__le64 checksum;
};
#endif
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/blkdev.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include "nd-core.h"
#include "btt.h"
#include "nd.h"
static void __nd_btt_detach_ndns(struct nd_btt *nd_btt)
{
struct nd_namespace_common *ndns = nd_btt->ndns;
dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
|| ndns->claim != &nd_btt->dev,
"%s: invalid claim\n", __func__);
ndns->claim = NULL;
nd_btt->ndns = NULL;
put_device(&ndns->dev);
}
static void nd_btt_detach_ndns(struct nd_btt *nd_btt)
{
struct nd_namespace_common *ndns = nd_btt->ndns;
if (!ndns)
return;
get_device(&ndns->dev);
device_lock(&ndns->dev);
__nd_btt_detach_ndns(nd_btt);
device_unlock(&ndns->dev);
put_device(&ndns->dev);
}
static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt,
struct nd_namespace_common *ndns)
{
if (ndns->claim)
return false;
dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
|| nd_btt->ndns,
"%s: invalid claim\n", __func__);
ndns->claim = &nd_btt->dev;
nd_btt->ndns = ndns;
get_device(&ndns->dev);
return true;
}
static bool nd_btt_attach_ndns(struct nd_btt *nd_btt,
struct nd_namespace_common *ndns)
{
bool claimed;
device_lock(&ndns->dev);
claimed = __nd_btt_attach_ndns(nd_btt, ndns);
device_unlock(&ndns->dev);
return claimed;
}
static void nd_btt_release(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
struct nd_btt *nd_btt = to_nd_btt(dev);
dev_dbg(dev, "%s\n", __func__);
nd_btt_detach_ndns(nd_btt);
ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
kfree(nd_btt->uuid);
kfree(nd_btt);
}
static struct device_type nd_btt_device_type = {
.name = "nd_btt",
.release = nd_btt_release,
};
bool is_nd_btt(struct device *dev)
{
return dev->type == &nd_btt_device_type;
}
EXPORT_SYMBOL(is_nd_btt);
struct nd_btt *to_nd_btt(struct device *dev)
{
struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev);
WARN_ON(!is_nd_btt(dev));
return nd_btt;
}
EXPORT_SYMBOL(to_nd_btt);
static const unsigned long btt_lbasize_supported[] = { 512, 4096, 0 };
static ssize_t sector_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_btt *nd_btt = to_nd_btt(dev);
return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf);
}
static ssize_t sector_size_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize,
btt_lbasize_supported);
dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
rc, buf, buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
device_unlock(dev);
return rc ? rc : len;
}
static DEVICE_ATTR_RW(sector_size);
static ssize_t uuid_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_btt *nd_btt = to_nd_btt(dev);
if (nd_btt->uuid)
return sprintf(buf, "%pUb\n", nd_btt->uuid);
return sprintf(buf, "\n");
}
static ssize_t uuid_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
device_lock(dev);
rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
rc, buf, buf[len - 1] == '\n' ? "" : "\n");
device_unlock(dev);
return rc ? rc : len;
}
static DEVICE_ATTR_RW(uuid);
static ssize_t namespace_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
nvdimm_bus_lock(dev);
rc = sprintf(buf, "%s\n", nd_btt->ndns
? dev_name(&nd_btt->ndns->dev) : "");
nvdimm_bus_unlock(dev);
return rc;
}
static int namespace_match(struct device *dev, void *data)
{
char *name = data;
return strcmp(name, dev_name(dev)) == 0;
}
static bool is_nd_btt_idle(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
struct nd_btt *nd_btt = to_nd_btt(dev);
if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver)
return false;
return true;
}
static ssize_t __namespace_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct nd_btt *nd_btt = to_nd_btt(dev);
struct nd_namespace_common *ndns;
struct device *found;
char *name;
if (dev->driver) {
dev_dbg(dev, "%s: -EBUSY\n", __func__);
return -EBUSY;
}
name = kstrndup(buf, len, GFP_KERNEL);
if (!name)
return -ENOMEM;
strim(name);
if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
/* pass */;
else {
len = -EINVAL;
goto out;
}
ndns = nd_btt->ndns;
if (strcmp(name, "") == 0) {
/* detach the namespace and destroy / reset the btt device */
nd_btt_detach_ndns(nd_btt);
if (is_nd_btt_idle(dev))
nd_device_unregister(dev, ND_ASYNC);
else {
nd_btt->lbasize = 0;
kfree(nd_btt->uuid);
nd_btt->uuid = NULL;
}
goto out;
} else if (ndns) {
dev_dbg(dev, "namespace already set to: %s\n",
dev_name(&ndns->dev));
len = -EBUSY;
goto out;
}
found = device_find_child(dev->parent, name, namespace_match);
if (!found) {
dev_dbg(dev, "'%s' not found under %s\n", name,
dev_name(dev->parent));
len = -ENODEV;
goto out;
}
ndns = to_ndns(found);
if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
dev_dbg(dev, "%s too small to host btt\n", name);
len = -ENXIO;
goto out_attach;
}
WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
if (!nd_btt_attach_ndns(nd_btt, ndns)) {
dev_dbg(dev, "%s already claimed\n",
dev_name(&ndns->dev));
len = -EBUSY;
}
out_attach:
put_device(&ndns->dev); /* from device_find_child */
out:
kfree(name);
return len;
}
static ssize_t namespace_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
ssize_t rc;
nvdimm_bus_lock(dev);
device_lock(dev);
rc = __namespace_store(dev, attr, buf, len);
dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
rc, buf, buf[len - 1] == '\n' ? "" : "\n");
device_unlock(dev);
nvdimm_bus_unlock(dev);
return rc;
}
static DEVICE_ATTR_RW(namespace);
static struct attribute *nd_btt_attributes[] = {
&dev_attr_sector_size.attr,
&dev_attr_namespace.attr,
&dev_attr_uuid.attr,
NULL,
};
static struct attribute_group nd_btt_attribute_group = {
.attrs = nd_btt_attributes,
};
static const struct attribute_group *nd_btt_attribute_groups[] = {
&nd_btt_attribute_group,
&nd_device_attribute_group,
NULL,
};
static struct device *__nd_btt_create(struct nd_region *nd_region,
unsigned long lbasize, u8 *uuid,
struct nd_namespace_common *ndns)
{
struct nd_btt *nd_btt;
struct device *dev;
nd_btt = kzalloc(sizeof(*nd_btt), GFP_KERNEL);
if (!nd_btt)
return NULL;
nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL);
if (nd_btt->id < 0) {
kfree(nd_btt);
return NULL;
}
nd_btt->lbasize = lbasize;
if (uuid)
uuid = kmemdup(uuid, 16, GFP_KERNEL);
nd_btt->uuid = uuid;
dev = &nd_btt->dev;
dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id);
dev->parent = &nd_region->dev;
dev->type = &nd_btt_device_type;
dev->groups = nd_btt_attribute_groups;
device_initialize(&nd_btt->dev);
if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) {
dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
__func__, dev_name(ndns->claim));
put_device(dev);
return NULL;
}
return dev;
}
struct device *nd_btt_create(struct nd_region *nd_region)
{
struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
if (dev)
__nd_device_register(dev);
return dev;
}
/*
* nd_btt_sb_checksum: compute checksum for btt info block
*
* Returns a fletcher64 checksum of everything in the given info block
* except the last field (since that's where the checksum lives).
*/
u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
{
u64 sum, sum_save;
sum_save = btt_sb->checksum;
btt_sb->checksum = 0;
sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
btt_sb->checksum = sum_save;
return sum;
}
EXPORT_SYMBOL(nd_btt_sb_checksum);
static int __nd_btt_probe(struct nd_btt *nd_btt,
struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
{
u64 checksum;
if (!btt_sb || !ndns || !nd_btt)
return -ENODEV;
if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb)))
return -ENXIO;
if (nvdimm_namespace_capacity(ndns) < SZ_16M)
return -ENXIO;
if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
return -ENODEV;
checksum = le64_to_cpu(btt_sb->checksum);
btt_sb->checksum = 0;
if (checksum != nd_btt_sb_checksum(btt_sb))
return -ENODEV;
btt_sb->checksum = cpu_to_le64(checksum);
nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
if (!nd_btt->uuid)
return -ENOMEM;
__nd_device_register(&nd_btt->dev);
return 0;
}
int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
{
int rc;
struct device *dev;
struct btt_sb *btt_sb;
struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
if (ndns->force_raw)
return -ENODEV;
nvdimm_bus_lock(&ndns->dev);
dev = __nd_btt_create(nd_region, 0, NULL, ndns);
nvdimm_bus_unlock(&ndns->dev);
if (!dev)
return -ENOMEM;
dev_set_drvdata(dev, drvdata);
btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL);
rc = __nd_btt_probe(to_nd_btt(dev), ndns, btt_sb);
kfree(btt_sb);
dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
rc == 0 ? dev_name(dev) : "<none>");
if (rc < 0) {
__nd_btt_detach_ndns(to_nd_btt(dev));
put_device(dev);
}
return rc;
}
EXPORT_SYMBOL(nd_btt_probe);
......@@ -14,8 +14,10 @@
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/fcntl.h>
#include <linux/async.h>
#include <linux/genhd.h>
#include <linux/ndctl.h>
#include <linux/sched.h>
#include <linux/slab.h>
......@@ -103,6 +105,7 @@ static int nvdimm_bus_probe(struct device *dev)
dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
dev_name(dev), rc);
if (rc != 0)
module_put(provider);
return rc;
......@@ -163,14 +166,19 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
put_device(dev);
}
void nd_device_register(struct device *dev)
void __nd_device_register(struct device *dev)
{
dev->bus = &nvdimm_bus_type;
device_initialize(dev);
get_device(dev);
async_schedule_domain(nd_async_device_register, dev,
&nd_async_domain);
}
void nd_device_register(struct device *dev)
{
device_initialize(dev);
__nd_device_register(dev);
}
EXPORT_SYMBOL(nd_device_register);
void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
......
......@@ -666,7 +666,7 @@ static int __blk_label_update(struct nd_region *nd_region,
/* don't allow updates that consume the last label */
if (nfree - alloc < 0 || nfree - alloc + victims < 1) {
dev_info(&nsblk->dev, "insufficient label space\n");
dev_info(&nsblk->common.dev, "insufficient label space\n");
kfree(victim_map);
return -ENOSPC;
}
......@@ -762,7 +762,8 @@ static int __blk_label_update(struct nd_region *nd_region,
continue;
res = to_resource(ndd, nd_label);
res->flags &= ~DPA_RESOURCE_ADJUSTED;
dev_vdbg(&nsblk->dev, "assign label[%d] slot: %d\n", l, slot);
dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n",
l, slot);
nd_mapping->labels[l++] = nd_label;
}
nd_mapping->labels[l] = NULL;
......
......@@ -102,7 +102,7 @@ static ssize_t __alt_name_store(struct device *dev, const char *buf,
} else
return -ENXIO;
if (dev->driver)
if (dev->driver || to_ndns(dev)->claim)
return -EBUSY;
input = kmemdup(buf, len + 1, GFP_KERNEL);
......@@ -133,7 +133,7 @@ static ssize_t __alt_name_store(struct device *dev, const char *buf,
static resource_size_t nd_namespace_blk_size(struct nd_namespace_blk *nsblk)
{
struct nd_region *nd_region = to_nd_region(nsblk->dev.parent);
struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent);
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_label_id label_id;
......@@ -152,9 +152,9 @@ static resource_size_t nd_namespace_blk_size(struct nd_namespace_blk *nsblk)
static int nd_namespace_label_update(struct nd_region *nd_region,
struct device *dev)
{
dev_WARN_ONCE(dev, dev->driver,
dev_WARN_ONCE(dev, dev->driver || to_ndns(dev)->claim,
"namespace must be idle during label update\n");
if (dev->driver)
if (dev->driver || to_ndns(dev)->claim)
return 0;
/*
......@@ -666,7 +666,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
u8 *uuid = NULL;
int rc, i;
if (dev->driver)
if (dev->driver || to_ndns(dev)->claim)
return -EBUSY;
if (is_namespace_pmem(dev)) {
......@@ -733,12 +733,16 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
nd_namespace_pmem_set_size(nd_region, nspm,
val * nd_region->ndr_mappings);
} else if (is_namespace_blk(dev)) {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
/*
* Try to delete the namespace if we deleted all of its
* allocation and this is not the seed device for the
* region.
* allocation, this is not the seed device for the
* region, and it is not actively claimed by a btt
* instance.
*/
if (val == 0 && nd_region->ns_seed != dev)
if (val == 0 && nd_region->ns_seed != dev
&& !nsblk->common.claim)
nd_device_unregister(dev, ND_ASYNC);
}
......@@ -789,26 +793,42 @@ static ssize_t size_store(struct device *dev,
return rc < 0 ? rc : len;
}
static ssize_t size_show(struct device *dev,
struct device_attribute *attr, char *buf)
resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns)
{
unsigned long long size = 0;
struct device *dev = &ndns->dev;
nvdimm_bus_lock(dev);
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
size = resource_size(&nspm->nsio.res);
return resource_size(&nspm->nsio.res);
} else if (is_namespace_blk(dev)) {
size = nd_namespace_blk_size(to_nd_namespace_blk(dev));
return nd_namespace_blk_size(to_nd_namespace_blk(dev));
} else if (is_namespace_io(dev)) {
struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
size = resource_size(&nsio->res);
}
nvdimm_bus_unlock(dev);
return resource_size(&nsio->res);
} else
WARN_ONCE(1, "unknown namespace type\n");
return 0;
}
resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns)
{
resource_size_t size;
return sprintf(buf, "%llu\n", size);
nvdimm_bus_lock(&ndns->dev);
size = __nvdimm_namespace_capacity(ndns);
nvdimm_bus_unlock(&ndns->dev);
return size;
}
EXPORT_SYMBOL(nvdimm_namespace_capacity);
static ssize_t size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%llu\n", (unsigned long long)
nvdimm_namespace_capacity(to_ndns(dev)));
}
static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);
......@@ -897,8 +917,8 @@ static ssize_t uuid_store(struct device *dev,
{
struct nd_region *nd_region = to_nd_region(dev->parent);
u8 *uuid = NULL;
ssize_t rc = 0;
u8 **ns_uuid;
ssize_t rc;
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
......@@ -914,7 +934,10 @@ static ssize_t uuid_store(struct device *dev,
device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = nd_uuid_store(dev, &uuid, buf, len);
if (to_ndns(dev)->claim)
rc = -EBUSY;
if (rc >= 0)
rc = nd_uuid_store(dev, &uuid, buf, len);
if (rc >= 0)
rc = namespace_update_uuid(nd_region, dev, uuid, ns_uuid);
if (rc >= 0)
......@@ -971,15 +994,18 @@ static ssize_t sector_size_store(struct device *dev,
{
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
struct nd_region *nd_region = to_nd_region(dev->parent);
ssize_t rc;
ssize_t rc = 0;
if (!is_namespace_blk(dev))
return -ENXIO;
device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_sector_size_store(dev, buf, &nsblk->lbasize,
ns_lbasize_supported);
if (to_ndns(dev)->claim)
rc = -EBUSY;
if (rc >= 0)
rc = nd_sector_size_store(dev, buf, &nsblk->lbasize,
ns_lbasize_supported);
if (rc >= 0)
rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
......@@ -1034,12 +1060,48 @@ static ssize_t dpa_extents_show(struct device *dev,
}
static DEVICE_ATTR_RO(dpa_extents);
static ssize_t holder_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_namespace_common *ndns = to_ndns(dev);
ssize_t rc;
device_lock(dev);
rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : "");
device_unlock(dev);
return rc;
}
static DEVICE_ATTR_RO(holder);
static ssize_t force_raw_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
bool force_raw;
int rc = strtobool(buf, &force_raw);
if (rc)
return rc;
to_ndns(dev)->force_raw = force_raw;
return len;
}
static ssize_t force_raw_show(struct device *dev,