Commit 4584ae96 authored by David Howells's avatar David Howells Committed by Al Viro

afs: Fix missing net error handling

kAFS can be given certain network errors (EADDRNOTAVAIL, EHOSTDOWN and
ERFKILL) that it doesn't handle in its server/address rotation algorithms.
They cause the probing and rotation to abort immediately rather than
rotating.

Fix this by:

 (1) Abstracting out the error prioritisation from the VL and FS rotation
     algorithms into a common function and expand usage into the server
     probing code.

     When multiple errors are available, this code selects the one we'd
     prefer to return.

 (2) Add handling for EADDRNOTAVAIL, EHOSTDOWN and ERFKILL.

Fixes: 0fafdc9f ("afs: Fix file locking")
Fixes: 0338747d8454 ("afs: Probe multiple fileservers simultaneously")
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent ae3b7361
......@@ -61,8 +61,11 @@ void afs_fileserver_probe_result(struct afs_call *call)
afs_io_error(call, afs_io_error_fs_probe_fail);
goto out;
case -ECONNRESET: /* Responded, but call expired. */
case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
......@@ -132,12 +135,14 @@ void afs_fileserver_probe_result(struct afs_call *call)
static int afs_do_probe_fileserver(struct afs_net *net,
struct afs_server *server,
struct key *key,
unsigned int server_index)
unsigned int server_index,
struct afs_error *_e)
{
struct afs_addr_cursor ac = {
.index = 0,
};
int ret;
bool in_progress = false;
int err;
_enter("%pU", &server->uuid);
......@@ -151,15 +156,17 @@ static int afs_do_probe_fileserver(struct afs_net *net,
server->probe.rtt = UINT_MAX;
for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
err = afs_fs_get_capabilities(net, server, &ac, key, server_index,
true);
if (ret != -EINPROGRESS) {
afs_fs_probe_done(server);
return ret;
}
if (err == -EINPROGRESS)
in_progress = true;
else
afs_prioritise_error(_e, err, ac.abort_code);
}
return 0;
if (!in_progress)
afs_fs_probe_done(server);
return in_progress;
}
/*
......@@ -169,21 +176,23 @@ int afs_probe_fileservers(struct afs_net *net, struct key *key,
struct afs_server_list *list)
{
struct afs_server *server;
int i, ret;
struct afs_error e;
bool in_progress = false;
int i;
e.error = 0;
e.responded = false;
for (i = 0; i < list->nr_servers; i++) {
server = list->servers[i].server;
if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
continue;
if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
ret = afs_do_probe_fileserver(net, server, key, i);
if (ret)
return ret;
}
if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags) &&
afs_do_probe_fileserver(net, server, key, i, &e))
in_progress = true;
}
return 0;
return in_progress ? 0 : e.error;
}
/*
......
......@@ -695,6 +695,14 @@ struct afs_interface {
unsigned mtu; /* MTU of interface */
};
/*
* Error prioritisation and accumulation.
*/
struct afs_error {
short error; /* Accumulated error */
bool responded; /* T if server responded */
};
/*
* Cursor for iterating over a server's address list.
*/
......@@ -1015,6 +1023,7 @@ static inline void __afs_stat(atomic_t *s)
* misc.c
*/
extern int afs_abort_to_error(u32);
extern void afs_prioritise_error(struct afs_error *, int, u32);
/*
* mntpt.c
......
......@@ -118,3 +118,55 @@ int afs_abort_to_error(u32 abort_code)
default: return -EREMOTEIO;
}
}
/*
* Select the error to report from a set of errors.
*/
void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
{
switch (error) {
case 0:
return;
default:
if (e->error == -ETIMEDOUT ||
e->error == -ETIME)
return;
case -ETIMEDOUT:
case -ETIME:
if (e->error == -ENOMEM ||
e->error == -ENONET)
return;
case -ENOMEM:
case -ENONET:
if (e->error == -ERFKILL)
return;
case -ERFKILL:
if (e->error == -EADDRNOTAVAIL)
return;
case -EADDRNOTAVAIL:
if (e->error == -ENETUNREACH)
return;
case -ENETUNREACH:
if (e->error == -EHOSTUNREACH)
return;
case -EHOSTUNREACH:
if (e->error == -EHOSTDOWN)
return;
case -EHOSTDOWN:
if (e->error == -ECONNREFUSED)
return;
case -ECONNREFUSED:
if (e->error == -ECONNRESET)
return;
case -ECONNRESET: /* Responded, but call expired. */
if (e->responded)
return;
e->error = error;
return;
case -ECONNABORTED:
e->responded = true;
e->error = afs_abort_to_error(abort_code);
return;
}
}
......@@ -136,7 +136,8 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
struct afs_addr_list *alist;
struct afs_server *server;
struct afs_vnode *vnode = fc->vnode;
u32 rtt, abort_code;
struct afs_error e;
u32 rtt;
int error = fc->ac.error, i;
_enter("%lx[%d],%lx[%d],%d,%d",
......@@ -306,8 +307,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
if (fc->error != -EDESTADDRREQ)
goto iterate_address;
/* Fall through */
case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EHOSTDOWN:
case -ECONNREFUSED:
_debug("no conn");
fc->error = error;
......@@ -446,50 +450,15 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
if (fc->flags & AFS_FS_CURSOR_VBUSY)
goto restart_from_beginning;
abort_code = 0;
error = -EDESTADDRREQ;
e.error = -EDESTADDRREQ;
e.responded = false;
for (i = 0; i < fc->server_list->nr_servers; i++) {
struct afs_server *s = fc->server_list->servers[i].server;
int probe_error = READ_ONCE(s->probe.error);
switch (probe_error) {
case 0:
continue;
default:
if (error == -ETIMEDOUT ||
error == -ETIME)
continue;
case -ETIMEDOUT:
case -ETIME:
if (error == -ENOMEM ||
error == -ENONET)
continue;
case -ENOMEM:
case -ENONET:
if (error == -ENETUNREACH)
continue;
case -ENETUNREACH:
if (error == -EHOSTUNREACH)
continue;
case -EHOSTUNREACH:
if (error == -ECONNREFUSED)
continue;
case -ECONNREFUSED:
if (error == -ECONNRESET)
continue;
case -ECONNRESET: /* Responded, but call expired. */
if (error == -ECONNABORTED)
continue;
case -ECONNABORTED:
abort_code = s->probe.abort_code;
error = probe_error;
continue;
}
afs_prioritise_error(&e, READ_ONCE(s->probe.error),
s->probe.abort_code);
}
if (error == -ECONNABORTED)
error = afs_abort_to_error(abort_code);
failed_set_error:
fc->error = error;
failed:
......@@ -553,8 +522,11 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
_leave(" = f [abort]");
return false;
case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
......@@ -633,6 +605,7 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
struct afs_net *net = afs_v2net(fc->vnode);
if (fc->error == -EDESTADDRREQ ||
fc->error == -EADDRNOTAVAIL ||
fc->error == -ENETUNREACH ||
fc->error == -EHOSTUNREACH)
afs_dump_edestaddrreq(fc);
......
......@@ -61,8 +61,11 @@ void afs_vlserver_probe_result(struct afs_call *call)
afs_io_error(call, afs_io_error_vl_probe_fail);
goto out;
case -ECONNRESET: /* Responded, but call expired. */
case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
......@@ -129,15 +132,17 @@ void afs_vlserver_probe_result(struct afs_call *call)
* Probe all of a vlserver's addresses to find out the best route and to
* query its capabilities.
*/
static int afs_do_probe_vlserver(struct afs_net *net,
struct afs_vlserver *server,
struct key *key,
unsigned int server_index)
static bool afs_do_probe_vlserver(struct afs_net *net,
struct afs_vlserver *server,
struct key *key,
unsigned int server_index,
struct afs_error *_e)
{
struct afs_addr_cursor ac = {
.index = 0,
};
int ret;
bool in_progress = false;
int err;
_enter("%s", server->name);
......@@ -151,15 +156,17 @@ static int afs_do_probe_vlserver(struct afs_net *net,
server->probe.rtt = UINT_MAX;
for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
ret = afs_vl_get_capabilities(net, &ac, key, server,
err = afs_vl_get_capabilities(net, &ac, key, server,
server_index, true);
if (ret != -EINPROGRESS) {
afs_vl_probe_done(server);
return ret;
}
if (err == -EINPROGRESS)
in_progress = true;
else
afs_prioritise_error(_e, err, ac.abort_code);
}
return 0;
if (!in_progress)
afs_vl_probe_done(server);
return in_progress;
}
/*
......@@ -169,21 +176,23 @@ int afs_send_vl_probes(struct afs_net *net, struct key *key,
struct afs_vlserver_list *vllist)
{
struct afs_vlserver *server;
int i, ret;
struct afs_error e;
bool in_progress = false;
int i;
e.error = 0;
e.responded = false;
for (i = 0; i < vllist->nr_servers; i++) {
server = vllist->servers[i].server;
if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
continue;
if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) {
ret = afs_do_probe_vlserver(net, server, key, i);
if (ret)
return ret;
}
if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags) &&
afs_do_probe_vlserver(net, server, key, i, &e))
in_progress = true;
}
return 0;
return in_progress ? 0 : e.error;
}
/*
......
......@@ -71,8 +71,9 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
{
struct afs_addr_list *alist;
struct afs_vlserver *vlserver;
struct afs_error e;
u32 rtt;
int error = vc->ac.error, abort_code, i;
int error = vc->ac.error, i;
_enter("%lx[%d],%lx[%d],%d,%d",
vc->untried, vc->index,
......@@ -119,8 +120,11 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
goto failed;
}
case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
......@@ -235,50 +239,15 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
if (vc->flags & AFS_VL_CURSOR_RETRY)
goto restart_from_beginning;
abort_code = 0;
error = -EDESTADDRREQ;
e.error = -EDESTADDRREQ;
e.responded = false;
for (i = 0; i < vc->server_list->nr_servers; i++) {
struct afs_vlserver *s = vc->server_list->servers[i].server;
int probe_error = READ_ONCE(s->probe.error);
switch (probe_error) {
case 0:
continue;
default:
if (error == -ETIMEDOUT ||
error == -ETIME)
continue;
case -ETIMEDOUT:
case -ETIME:
if (error == -ENOMEM ||
error == -ENONET)
continue;
case -ENOMEM:
case -ENONET:
if (error == -ENETUNREACH)
continue;
case -ENETUNREACH:
if (error == -EHOSTUNREACH)
continue;
case -EHOSTUNREACH:
if (error == -ECONNREFUSED)
continue;
case -ECONNREFUSED:
if (error == -ECONNRESET)
continue;
case -ECONNRESET: /* Responded, but call expired. */
if (error == -ECONNABORTED)
continue;
case -ECONNABORTED:
abort_code = s->probe.abort_code;
error = probe_error;
continue;
}
afs_prioritise_error(&e, READ_ONCE(s->probe.error),
s->probe.abort_code);
}
if (error == -ECONNABORTED)
error = afs_abort_to_error(abort_code);
failed_set_error:
vc->error = error;
failed:
......@@ -341,6 +310,7 @@ int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
struct afs_net *net = vc->cell->net;
if (vc->error == -EDESTADDRREQ ||
vc->error == -EADDRNOTAVAIL ||
vc->error == -ENETUNREACH ||
vc->error == -EHOSTUNREACH)
afs_vl_dump_edestaddrreq(vc);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment