Commit 5536b3a5 authored by Milan Broz's avatar Milan Broz

Add implementation of device/file locking for metadata.

To be used later.
parent a8347d28
......@@ -5,7 +5,8 @@ SUBDIRS = \
man \
python \
tests \
po
po \
scripts
ACLOCAL_AMFLAGS = -I m4
......
......@@ -494,6 +494,17 @@ CS_NUM_WITH([verity-hash-block], [hash block size for verity mode], [4096])
CS_NUM_WITH([verity-salt-size], [salt size for verity mode], [32])
CS_NUM_WITH([verity-fec-roots], [parity bytes for verity FEC], [2])
CS_STR_WITH([luks2-lock-path], [path to directory for LUKSv2 locks], [/run/lock/cryptsetup])
test -z "$with_luks2_lock_path" && with_luks2_lock_path=/run/lock/cryptsetup
test "${with_luks2_lock_path:0:1}" = "/" || AC_MSG_ERROR([--with-luks2-lock-path argument must be an absolute path.])
DEFAULT_LUKS2_LOCK_PATH=$with_luks2_lock_path
AC_SUBST(DEFAULT_LUKS2_LOCK_PATH)
CS_NUM_WITH([luks2-lock-dir-perms], [default luks2 locking directory permissions], [0700])
test -z "$with_luks2_lock_dir_perms" && with_luks2_lock_dir_perms=0700
DEFAULT_LUKS2_LOCK_DIR_PERMS=$with_luks2_lock_dir_perms
AC_SUBST(DEFAULT_LUKS2_LOCK_DIR_PERMS)
dnl ==========================================================================
AC_CONFIG_FILES([ Makefile
......@@ -511,5 +522,7 @@ po/Makefile.in
man/Makefile
tests/Makefile
python/Makefile
scripts/Makefile
scripts/cryptsetup_tmpfiles.conf
])
AC_OUTPUT
LUKS version 2 device locking overview
======================================
(last updated: 2017-09-01)
TODOs:
a) installation tips for distro maintainers
Why
---
Unlike with version 1, LUKS v2 format keeps two identical copies of metadata
stored consecutively at head of metadata device (file or bdev). The metadata
area (both copies) must be updated in a single atomic operation to avoid
some nasty side effects.
While with LUKSv1 users may have clear knowledge of when a LUKS header is
being updated (written to) or when it's being read solely the need for
locking with legacy format was not so obvious as it is with LUKSv2 format.
With LUKSv2 the boundary between read-only and read-write is blurry and what
used to be exclusively read-only operation (i.e. cryptsetup open command) may
easily become read-update operation silently without user's knowledge.
A significant feature of LUKSv2 format is resilience against accidental
corruption of metadata. (i.e. partial header overwrite by parted or cfdisk
while creating partion on mistaken block device).
Such header corruption is detected eraly on header read and auto-recovery
procedure takes place (the corrupted header with checksum mismatch is being
replaced by the secondary one if that one is intact).
On current Linux systems header load operation may be triggered without user
direct intervention for example by udev rule or from systemd service.
Such clash of header read and auto-recovery procudere could have severe
consequences with worst case of having LUKSv2 device unaccessible or being
broken beyond repair.
Whole locking of LUKSv2 device headers split in two categories depending
what backend the header is stored on:
I) block device
---------------
We perform flock() on file descriptors of files stored in a private
directory (by default /run/lock/cryptsetup). The file name is derived
from major:minor couple of affected block device. Note we recommend
that access to private locking directory is supposed to be limited
to superuser only. For this method to work the distribution needs
to install the locking directory with appropriate access rights.
II) regular files
-----------------
First notable difference between headers stored in a file
vs headers stored in a block device is that headers in a file may be
manipulated by regular user unlike headers on block devices. Therefore
we perform flock() protection on a file with luks2 header directly.
Limitations
-----------
a) In general the locking model provide serialization of I/Os targeting
the header only. It means the header is always written or read at once
while locking is enabled.
We do not suppress any other negative effect that two or more concurrent
writers of same header may cause.
b) It's obvious but for the sake of clarification: the locking is not
cluster aware in any way.
......@@ -63,6 +63,8 @@ libcryptsetup_la_SOURCES = \
utils_device.c \
utils_keyring.c \
utils_keyring.h \
utils_device_locking.c \
utils_device_locking.h \
utils_pbkdf.c \
libdevmapper.c \
utils_dm.h \
......
......@@ -88,6 +88,7 @@ int device_alloc(struct device **device, const char *path);
int device_alloc_no_check(struct device **device, const char *path);
void device_free(struct device *device);
const char *device_path(const struct device *device);
const char *device_dm_name(const struct device *device);
const char *device_block_path(const struct device *device);
void device_topology_alignment(struct device *device,
unsigned long *required_alignment, /* bytes */
......@@ -101,6 +102,13 @@ void device_disable_direct_io(struct device *device);
int device_is_identical(struct device *device1, struct device *device2);
int device_is_rotational(struct device *device);
size_t device_alignment(struct device *device);
int device_direct_io(struct device *device);
int device_open_locked(struct device *device, int flags);
int device_read_lock(struct crypt_device *cd, struct device *device);
int device_write_lock(struct crypt_device *cd, struct device *device);
void device_read_unlock(struct device *device);
void device_write_unlock(struct device *device);
enum devcheck { DEV_OK = 0, DEV_EXCL = 1, DEV_SHARED = 2 };
int device_check_access(struct crypt_device *cd,
......@@ -152,6 +160,8 @@ int crypt_get_debug_level(void);
int crypt_memlock_inc(struct crypt_device *ctx);
int crypt_memlock_dec(struct crypt_device *ctx);
int crypt_metadata_locking_enabled(void);
int crypt_random_init(struct crypt_device *ctx);
int crypt_random_get(struct crypt_device *ctx, char *buf, size_t len, int quality);
void crypt_random_exit(void);
......
......@@ -276,11 +276,18 @@ void crypt_set_iteration_time(struct crypt_device *cd, uint64_t iteration_time_m
int crypt_memory_lock(struct crypt_device *cd, int lock);
/**
* @defgroup crypt_type Cryptsetup on-disk format types
* Set global lock protection for on-disk metadata (file-based locking).
*
* Set of functions, \#defines and structs related
* to on-disk format types
* @param cd crypt device handle, can be @e NULL
* @param enable 0 to disable locking otherwise enable it (default)
*
* @returns @e 0 on success or negative errno value otherwise.
*
* @note Locking applied only for some metadata formats (LUKS2).
* @note The switch is global on the library level.
* In current version locking can be only switched off and cannot be switched on later.
*/
int crypt_metadata_locking(struct crypt_device *cd, int enable);
/**
* Set or unset loading of volume keys via kernel keyring. When set to 'enabled'
......@@ -300,6 +307,13 @@ int crypt_memory_lock(struct crypt_device *cd, int lock);
*/
int crypt_volume_key_keyring(struct crypt_device *cd, int enable);
/**
* @defgroup crypt_type Cryptsetup on-disk format types
*
* Set of functions, \#defines and structs related
* to on-disk format types
*/
/**
* @addtogroup crypt_type
* @{
......
......@@ -11,6 +11,7 @@ CRYPTSETUP_2.0 {
crypt_set_data_device;
crypt_memory_lock;
crypt_metadata_locking;
crypt_format;
crypt_load;
crypt_repair;
......
......@@ -109,6 +109,9 @@ static int _crypto_logged = 0;
static void (*_default_log)(int level, const char *msg, void *usrptr) = NULL;
static int _debug_level = 0;
/* Library can do metadata locking */
static int _metadata_locking = 1;
/* Library scope detection for kernel keyring support */
static int _kernel_keyring_supported;
......@@ -3064,6 +3067,21 @@ int crypt_use_keyring_for_vk(const struct crypt_device *cd)
return (dmc_flags & DM_KERNEL_KEYRING_SUPPORTED);
}
/* Internal only */
int crypt_metadata_locking_enabled(void)
{
return _metadata_locking;
}
int crypt_metadata_locking(struct crypt_device *cd, int enable)
{
if (enable && !_metadata_locking)
return -EPERM;
_metadata_locking = enable ? 1 : 0;
return 0;
}
static void __attribute__((destructor)) libcryptsetup_exit(void)
{
crypt_backend_destroy();
......
......@@ -21,6 +21,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
......@@ -37,6 +38,7 @@
# include <sys/statvfs.h>
#endif
#include "internal.h"
#include "utils_device_locking.h"
struct device {
char *path;
......@@ -44,6 +46,8 @@ struct device {
char *file_path;
int loop_fd;
struct crypt_lock_handle *lh;
int o_direct:1;
int init_done:1;
......@@ -194,7 +198,41 @@ static int device_ready(struct device *device)
return r;
}
int device_open(struct device *device, int flags)
static int _open_locked(struct device *device, int flags)
{
int fd;
log_dbg("Opening locked device %s", device_path(device));
if ((flags & O_ACCMODE) != O_RDONLY && device_locked_readonly(device->lh)) {
log_dbg("Can not open locked device %s in write mode. Read lock held.", device_path(device));
return -EAGAIN;
}
fd = open(device_path(device), flags);
if (fd < 0)
return -errno;
if (device_locked_verify(fd, device->lh)) {
/* fd doesn't correspond to a locked resource */
close(fd);
log_dbg("Failed to verify lock resource for device %s.", device_path(device));
return -EINVAL;
}
return fd;
}
/*
* in non-locked mode returns always fd or -1
*
* in locked mode:
* opened fd or one of:
* -EAGAIN : requested write mode while device being locked in via shared lock
* -EINVAL : invalid lock fd state
* -1 : all other errors
*/
static int device_open_internal(struct device *device, int flags)
{
int devfd;
......@@ -202,7 +240,10 @@ int device_open(struct device *device, int flags)
if (device->o_direct)
flags |= O_DIRECT;
devfd = open(device_path(device), flags);
if (device_locked(device->lh))
devfd = _open_locked(device, flags);
else
devfd = open(device_path(device), flags);
if (devfd < 0)
log_dbg("Cannot open device %s.", device_path(device));
......@@ -210,6 +251,18 @@ int device_open(struct device *device, int flags)
return devfd;
}
int device_open(struct device *device, int flags)
{
assert(!device_locked(device->lh));
return device_open_internal(device, flags);
}
int device_open_locked(struct device *device, int flags)
{
assert(!crypt_metadata_locking_enabled() || device_locked(device->lh));
return device_open_internal(device, flags);
}
/* Avoid any read from device, expects direct-io to work. */
int device_alloc_no_check(struct device **device, const char *path)
{
......@@ -273,6 +326,8 @@ void device_free(struct device *device)
close(device->loop_fd);
}
assert (!device_locked(device->lh));
free(device->file_path);
free(device->path);
free(device);
......@@ -287,6 +342,21 @@ const char *device_block_path(const struct device *device)
return device->path;
}
/* Get device-mapper name of device (if possible) */
const char *device_dm_name(const struct device *device)
{
const char *dmdir = dm_get_dir();
size_t dmdir_len = strlen(dmdir);
if (!device || !device->init_done)
return NULL;
if (strncmp(device->path, dmdir, dmdir_len))
return NULL;
return &device->path[dmdir_len+1];
}
/* Get path to device / file */
const char *device_path(const struct device *device)
{
......@@ -625,6 +695,11 @@ void device_disable_direct_io(struct device *device)
device->o_direct = 0;
}
int device_direct_io(struct device *device)
{
return device->o_direct;
}
int device_is_identical(struct device *device1, struct device *device2)
{
if (device1 == device2)
......@@ -667,3 +742,65 @@ size_t device_alignment(struct device *device)
return device->alignment;
}
int device_read_lock(struct crypt_device *cd, struct device *device)
{
if (!crypt_metadata_locking_enabled())
return 0;
assert(!device_locked(device->lh));
device->lh = device_read_lock_handle(cd, device_path(device));
if (device_locked(device->lh)) {
log_dbg("Device %s READ lock taken.", device_path(device));
return 0;
}
return -EBUSY;
}
int device_write_lock(struct crypt_device *cd, struct device *device)
{
if (!crypt_metadata_locking_enabled())
return 0;
assert(!device_locked(device->lh));
device->lh = device_write_lock_handle(cd, device_path(device));
if (device_locked(device->lh)) {
log_dbg("Device %s WRITE lock taken.", device_path(device));
return 0;
}
return -EBUSY;
}
void device_read_unlock(struct device *device)
{
if (!crypt_metadata_locking_enabled())
return;
assert(device_locked(device->lh) && device_locked_readonly(device->lh));
device_unlock_handle(device->lh);
log_dbg("Device %s READ lock released.", device_path(device));
device->lh = NULL;
}
void device_write_unlock(struct device *device)
{
if (!crypt_metadata_locking_enabled())
return;
assert(device_locked(device->lh) && !device_locked_readonly(device->lh));
device_unlock_handle(device->lh);
log_dbg("Device %s WRITE lock released.", device_path(device));
device->lh = NULL;
}
/*
* Metadata on-disk locking for processes serialization
*
* Copyright (C) 2016-2017, Red Hat, Inc. All rights reserved.
* Copyright (C) 2016-2017, Ondrej Kozina. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <errno.h>
#include <fcntl.h>
#include <linux/limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#ifdef HAVE_SYS_SYSMACROS_H
# include <sys/sysmacros.h> /* for major, minor */
#endif
#include <libgen.h>
#include "internal.h"
#include "utils_device_locking.h"
#define same_inode(buf1, buf2) \
((buf1).st_ino == (buf2).st_ino && \
(buf1).st_dev == (buf2).st_dev)
#ifndef __GNUC__
# define __typeof__ typeof
#endif
enum lock_type {
DEV_LOCK_READ = 0,
DEV_LOCK_WRITE
};
struct crypt_lock_handle {
dev_t devno;
int flock_fd;
enum lock_type type;
__typeof__( ((struct stat*)0)->st_mode) mode;
};
static int resource_by_devno(char *res, size_t res_size, dev_t devno, unsigned fullpath)
{
int r;
if (fullpath)
r = snprintf(res, res_size, "%s/L_%d:%d", DEFAULT_LUKS2_LOCK_PATH, major(devno), minor(devno));
else
r = snprintf(res, res_size, "L_%d:%d", major(devno), minor(devno));
return (r < 0 || (size_t)r >= res_size) ? -EINVAL : 0;
}
static int open_lock_dir(struct crypt_device *cd, const char *dir, const char *base)
{
int dirfd, lockdfd;
dirfd = open(dir, O_RDONLY | O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC);
if (dirfd < 0) {
log_dbg("Failed to open directory '%s': (%d: %s).", dir, errno, strerror(errno));
return -EINVAL;
}
lockdfd = openat(dirfd, base, O_RDONLY | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
if (lockdfd < 0) {
if (errno == ENOENT) {
log_std(cd, _("WARNING: Locking directory %s/%s is missing!\n"), dir, base);
/* success or failure w/ errno == EEXIST either way just try to open the 'base' directory again */
if (mkdirat(dirfd, base, DEFAULT_LUKS2_LOCK_DIR_PERMS) && errno != EEXIST)
log_dbg("Failed to create directory %s in %s (%d: %s).", base, dir, errno, strerror(errno));
else
lockdfd = openat(dirfd, base, O_RDONLY | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
} else
log_dbg("Failed to open directory %s/%s: (%d: %s)", dir, base, errno, strerror(errno));
}
close(dirfd);
return lockdfd >= 0 ? lockdfd : -EINVAL;
}
static int open_resource(struct crypt_device *cd, const char *res)
{
int err, lockdir_fd, r;
char dir[] = DEFAULT_LUKS2_LOCK_PATH,
base[] = DEFAULT_LUKS2_LOCK_PATH;
lockdir_fd = open_lock_dir(cd, dirname(dir), basename(base));
if (lockdir_fd < 0)
return -EINVAL;
log_dbg("Opening lock resource file %s/%s", DEFAULT_LUKS2_LOCK_PATH, res);
r = openat(lockdir_fd, res, O_CREAT | O_NOFOLLOW | O_RDWR | O_CLOEXEC, 0777);
err = errno;
close(lockdir_fd);
return r < 0 ? -err : r;
}
static int acquire_lock_handle(struct crypt_device *cd, const char *device_path, struct crypt_lock_handle *h)
{
char res[PATH_MAX];
int dev_fd, fd;
struct stat st;
dev_fd = open(device_path, O_RDONLY | O_NONBLOCK | O_CLOEXEC);
if (dev_fd < 0)
return -EINVAL;
if (fstat(dev_fd, &st)) {
close(dev_fd);
return -EINVAL;
}
if (S_ISBLK(st.st_mode)) {
if (resource_by_devno(res, sizeof(res), st.st_rdev, 0)) {
close(dev_fd);
return -EINVAL;
}
fd = open_resource(cd, res);
close(dev_fd);
if (fd < 0)
return fd;
h->flock_fd = fd;
h->devno = st.st_rdev;
} else if (S_ISREG(st.st_mode)) {
// FIXME: workaround for nfsv4
fd = open(device_path, O_RDWR | O_NONBLOCK | O_CLOEXEC);
if (fd < 0)
h->flock_fd = dev_fd;
else {
h->flock_fd = fd;
close(dev_fd);
}
} else {
/* Wrong device type */
close(dev_fd);
return -EINVAL;
}
h->mode = st.st_mode;
return 0;
}
static void release_lock_handle(struct crypt_lock_handle *h)
{
char res[PATH_MAX];
struct stat buf_a, buf_b;
if (S_ISBLK(h->mode) && /* was it block device */
!flock(h->flock_fd, LOCK_EX | LOCK_NB) && /* lock to drop the file */
!resource_by_devno(res, sizeof(res), h->devno, 1) && /* acquire lock resource name */
!fstat(h->flock_fd, &buf_a) && /* read inode id refered by fd */
!stat(res, &buf_b) && /* does path file stil exist? */
same_inode(buf_a, buf_b)) /* is it same id as the one referenced by fd? */
if (unlink(res)) /* yes? unlink the file */
log_dbg("Failed to unlink resource file: %s", res);
if (close(h->flock_fd))
log_dbg("Failed to close resource fd (%d).", h->flock_fd);
}
int device_locked(struct crypt_lock_handle *h)
{
return (h && (h->type == DEV_LOCK_READ || h->type == DEV_LOCK_WRITE));
}
int device_locked_readonly(struct crypt_lock_handle *h)
{
return (h && h->type == DEV_LOCK_READ);
}
static int verify_lock_handle(const char *device_path, struct crypt_lock_handle *h)
{
char res[PATH_MAX];
struct stat lck_st, res_st;
/* we locked a regular file, check during device_open() instead. No reason to check now */
if (S_ISREG(h->mode))
return 0;
if (resource_by_devno(res, sizeof(res), h->devno, 1))
return -EINVAL;
if (fstat(h->flock_fd, &lck_st))
return -EINVAL;
return (stat(res, &res_st) || !same_inode(lck_st, res_st)) ? -EAGAIN : 0;
}
struct crypt_lock_handle *device_read_lock_handle(struct crypt_device *cd, const char *device_path)
{
int r;
struct crypt_lock_handle *h = malloc(sizeof(*h));
if (!h)
return NULL;
do {
r = acquire_lock_handle(cd, device_path, h);
if (r)
break;
log_dbg("Acquiring read lock for device %s.", device_path);
if (flock(h->flock_fd, LOCK_SH)) {
log_dbg("Shared flock failed with errno %d.", errno);
r = -EINVAL;
release_lock_handle(h);
break;
}
log_dbg("Verifying read lock handle for device %s.", device_path);
/*
* check whether another libcryptsetup process removed resource file before this
* one managed to flock() it. See release_lock_handle() for details
*/
r = verify_lock_handle(device_path, h);
if (r) {
flock(h->flock_fd, LOCK_UN);
release_lock_handle(h);
log_dbg("Read lock handle verification failed.");
}
} while (r == -EAGAIN);
if (r) {
free(h);
return NULL;
}
h->type = DEV_LOCK_READ;
return h;
}
struct crypt_lock_handle *device_write_lock_handle(struct crypt_device *cd, const char *device_path)
{
int r;
struct crypt_lock_handle *h = malloc(sizeof(*h));
if (!h)
return NULL;
do {
r = acquire_lock_handle(cd, device_path, h);
if (r)
break;
log_dbg("Acquiring write lock for device %s.", device_path);
if (flock(h->flock_fd, LOCK_EX)) {
log_dbg("Exclusive flock failed with errno %d.", errno);
r = -EINVAL;
release_lock_handle(h);
break;
}
log_dbg("Verifying write lock handle for device %s.", device_path);
/*
* check whether another libcryptsetup process removed resource file before this
* one managed to flock() it. See release_lock_handle() for details
*/
r = verify_lock_handle(device_path, h);
if (r) {
flock(h->flock_fd, LOCK_UN);
release_lock_handle(h);
log_dbg("Write lock handle verification failed.");
}
} while (r == -EAGAIN);
if (r) {
free(h);
return NULL;
}
h->type = DEV_LOCK_WRITE;
return h;
}
void device_unlock_handle(struct crypt_lock_handle *h)
{
if (flock(h->flock_fd, LOCK_UN))
log_dbg("flock on fd %d failed.", h->flock_fd);
release_lock_handle(h);
free(h);
}
int device_locked_verify(int dev_fd, struct crypt_lock_handle *h)
{
char res[PATH_MAX];
struct stat dev_st, lck_st, st;
if (fstat(dev_fd, &dev_st) || fstat(h->flock_fd, &lck_st))
return 1;