Commit 6e6a8b24 authored by Dan Williams's avatar Dan Williams Committed by Greg Kroah-Hartman

mm, devm_memremap_pages: fix shutdown handling

commit a95c90f1 upstream.

The last step before devm_memremap_pages() returns success is to allocate
a release action, devm_memremap_pages_release(), to tear the entire setup
down.  However, the result from devm_add_action() is not checked.

Checking the error from devm_add_action() is not enough.  The api
currently relies on the fact that the percpu_ref it is using is killed by
the time the devm_memremap_pages_release() is run.  Rather than continue
this awkward situation, offload the responsibility of killing the
percpu_ref to devm_memremap_pages_release() directly.  This allows
devm_memremap_pages() to do the right thing relative to init failures and

Without this change we could fail to register the teardown of
devm_memremap_pages().  The likelihood of hitting this failure is tiny as
small memory allocations almost always succeed.  However, the impact of
the failure is large given any future reconfiguration, or disable/enable,
of an nvdimm namespace will fail forever as subsequent calls to
devm_memremap_pages() will fail to setup the pgmap_radix since there will
be stale entries for the physical address range.

An argument could be made to require that the ->kill() operation be set in
the @pgmap arg rather than passed in separately.  However, it helps code
readability, tracking the lifetime of a given instance, to be able to grep
the kill routine directly at the devm_memremap_pages() call site.

Link: default avatarDan Williams <>
Fixes: e8d51348 ("memremap: change devm_memremap_pages interface...")
Reviewed-by: default avatar"Jérôme Glisse" <>
Reported-by: Logan Gunthorpe's avatarLogan Gunthorpe <>
Reviewed-by: Logan Gunthorpe's avatarLogan Gunthorpe <>
Reviewed-by: default avatarChristoph Hellwig <>
Cc: Balbir Singh <>
Cc: Michal Hocko <>
Cc: <>
Signed-off-by: default avatarAndrew Morton <>
Signed-off-by: default avatarLinus Torvalds <>
Signed-off-by: default avatarGreg Kroah-Hartman <>
parent 13ab61ae
......@@ -48,9 +48,8 @@ static void dax_pmem_percpu_exit(void *data)
static void dax_pmem_percpu_kill(void *data)
static void dax_pmem_percpu_kill(struct percpu_ref *ref)
struct percpu_ref *ref = data;
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
dev_dbg(dax_pmem->dev, "trace\n");
......@@ -112,17 +111,10 @@ static int dax_pmem_probe(struct device *dev)
dax_pmem->pgmap.ref = &dax_pmem->ref;
dax_pmem->pgmap.kill = dax_pmem_percpu_kill;
addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
if (IS_ERR(addr)) {
devm_remove_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
if (IS_ERR(addr))
return PTR_ERR(addr);
rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
if (rc)
return rc;
/* adjust the dax_region resource to the start of data */
memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
......@@ -309,8 +309,11 @@ static void pmem_release_queue(void *q)
static void pmem_freeze_queue(void *q)
static void pmem_freeze_queue(struct percpu_ref *ref)
struct request_queue *q;
q = container_of(ref, typeof(*q), q_usage_counter);
......@@ -402,6 +405,7 @@ static int pmem_attach_disk(struct device *dev,
pmem->pfn_flags = PFN_DEV;
pmem->pgmap.ref = &q->q_usage_counter;
pmem->pgmap.kill = pmem_freeze_queue;
if (is_nd_pfn(dev)) {
if (setup_pagemap_fsdax(dev, &pmem->pgmap))
return -ENOMEM;
......@@ -427,13 +431,6 @@ static int pmem_attach_disk(struct device *dev,
memcpy(&bb_res, &nsio->res, sizeof(bb_res));
* At release time the queue must be frozen before
* devm_memremap_pages is unwound
if (devm_add_action_or_reset(dev, pmem_freeze_queue, q))
return -ENOMEM;
if (IS_ERR(addr))
return PTR_ERR(addr);
pmem->virt_addr = addr;
......@@ -111,6 +111,7 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
* @altmap: pre-allocated/reserved memory for vmemmap allocations
* @res: physical address range covered by @ref
* @ref: reference count that pins the devm_memremap_pages() mapping
* @kill: callback to transition @ref to the dead state
* @dev: host device of the mapping for debug
* @data: private data pointer for page_free()
* @type: memory type: see MEMORY_* in memory_hotplug.h
......@@ -122,6 +123,7 @@ struct dev_pagemap {
bool altmap_valid;
struct resource res;
struct percpu_ref *ref;
void (*kill)(struct percpu_ref *ref);
struct device *dev;
void *data;
enum memory_type type;
......@@ -88,14 +88,10 @@ static void devm_memremap_pages_release(void *data)
resource_size_t align_start, align_size;
unsigned long pfn;
for_each_device_pfn(pfn, pgmap)
if (percpu_ref_tryget_live(pgmap->ref)) {
dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
/* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
......@@ -116,7 +112,7 @@ static void devm_memremap_pages_release(void *data)
* devm_memremap_pages - remap and provide memmap backing for the given resource
* @dev: hosting device for @res
* @pgmap: pointer to a struct dev_pgmap
* @pgmap: pointer to a struct dev_pagemap
* Notes:
* 1/ At a minimum the res, ref and type members of @pgmap must be initialized
......@@ -125,11 +121,8 @@ static void devm_memremap_pages_release(void *data)
* 2/ The altmap field may optionally be initialized, in which case altmap_valid
* must be set to true
* 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
* time (or devm release event). The expected order of events is that ref has
* been through percpu_ref_kill() before devm_memremap_pages_release(). The
* wait for the completion of all references being dropped and
* percpu_ref_exit() must occur after devm_memremap_pages_release().
* 3/ pgmap->ref must be 'live' on entry and will be killed at
* devm_memremap_pages_release() time, or if this routine fails.
* 4/ res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but
......@@ -145,6 +138,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
pgprot_t pgprot = PAGE_KERNEL;
int error, nid, is_ram;
if (!pgmap->ref || !pgmap->kill)
return ERR_PTR(-EINVAL);
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
- align_start;
......@@ -170,12 +166,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (is_ram != REGION_DISJOINT) {
WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
is_ram == REGION_MIXED ? "mixed" : "ram", res);
return ERR_PTR(-ENXIO);
error = -ENXIO;
goto err_array;
if (!pgmap->ref)
return ERR_PTR(-EINVAL);
pgmap->dev = dev;
error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
......@@ -217,7 +211,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
align_size >> PAGE_SHIFT, pgmap);
percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
devm_add_action(dev, devm_memremap_pages_release, pgmap);
error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
if (error)
return ERR_PTR(error);
return __va(res->start);
......@@ -228,6 +225,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
return ERR_PTR(error);
......@@ -104,13 +104,26 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
static void nfit_test_kill(void *_pgmap)
struct dev_pagemap *pgmap = _pgmap;
void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
resource_size_t offset = pgmap->res.start;
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
if (nfit_res) {
int rc;
rc = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
if (rc)
return ERR_PTR(rc);
return nfit_res->buf + offset - nfit_res->res.start;
return devm_memremap_pages(dev, pgmap);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment