Commit 5d6b0cb3 authored by Denis Drozdov's avatar Denis Drozdov Committed by Saeed Mahameed

RDMA/netdev: Fix netlink support in IPoIB

IPoIB netlink support was broken by the below commit since integrating
the rdma_netdev support relies on an allocation flow for netdevs that
was controlled by the ipoib driver while netdev's rtnl_newlink
implementation assumes that the netdev will be allocated by netlink.
Such situation leads to crash in __ipoib_device_add, once trying to
reuse netlink device.

This patch fixes the kernel oops for both mlx4 and mlx5
devices triggered by the following command:

Fixes: cd565b4b ("IB/IPoIB: Support acceleration options callbacks")
Signed-off-by: default avatarDenis Drozdov <denisd@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarFeras Daoud <ferasda@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent f6a8a19b
......@@ -2643,13 +2643,27 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
if (!netdev)
return ERR_PTR(-ENOMEM);
rc = params.initialize_rdma_netdev(device, port_num, netdev,
params.param);
if (rc) {
free_netdev(netdev);
return ERR_PTR(rc);
}
return netdev;
}
EXPORT_SYMBOL(rdma_alloc_netdev);
int rdma_init_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
struct net_device *netdev)
{
struct rdma_netdev_alloc_params params;
int rc;
if (!device->rdma_netdev_get_params)
return -EOPNOTSUPP;
rc = device->rdma_netdev_get_params(device, port_num, type, &params);
if (rc)
return rc;
return params.initialize_rdma_netdev(device, port_num,
netdev, params.param);
}
EXPORT_SYMBOL(rdma_init_netdev);
......@@ -499,8 +499,10 @@ void ipoib_reap_ah(struct work_struct *work);
struct ipoib_path *__path_find(struct net_device *dev, void *gid);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
const char *format);
struct net_device *ipoib_intf_alloc(struct ib_device *hca, u8 port,
const char *format);
int ipoib_intf_init(struct ib_device *hca, u8 port, const char *format,
struct net_device *dev);
void ipoib_ib_tx_timer_func(struct timer_list *t);
void ipoib_ib_dev_flush_light(struct work_struct *work);
void ipoib_ib_dev_flush_normal(struct work_struct *work);
......@@ -531,6 +533,8 @@ int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv,
struct ipoib_tx_buf *tx_req);
struct rtnl_link_ops *ipoib_get_link_ops(void);
static inline void ipoib_build_sge(struct ipoib_dev_priv *priv,
struct ipoib_tx_buf *tx_req)
{
......
......@@ -2115,77 +2115,58 @@ static const struct net_device_ops ipoib_netdev_default_pf = {
.ndo_stop = ipoib_ib_dev_stop_default,
};
static struct net_device
*ipoib_create_netdev_default(struct ib_device *hca,
const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *))
{
struct net_device *dev;
struct rdma_netdev *rn;
dev = alloc_netdev((int)sizeof(struct rdma_netdev),
name,
name_assign_type, setup);
if (!dev)
return NULL;
rn = netdev_priv(dev);
rn->send = ipoib_send;
rn->attach_mcast = ipoib_mcast_attach;
rn->detach_mcast = ipoib_mcast_detach;
rn->hca = hca;
dev->netdev_ops = &ipoib_netdev_default_pf;
return dev;
}
static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
const char *name)
static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u8 port,
const char *name)
{
struct net_device *dev;
dev = rdma_alloc_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
NET_NAME_UNKNOWN, ipoib_setup_common);
if (!IS_ERR(dev))
if (!IS_ERR(dev) || PTR_ERR(dev) != -EOPNOTSUPP)
return dev;
if (PTR_ERR(dev) != -EOPNOTSUPP)
return NULL;
return ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
ipoib_setup_common);
dev = alloc_netdev(sizeof(struct rdma_netdev), name, NET_NAME_UNKNOWN,
ipoib_setup_common);
if (!dev)
return ERR_PTR(-ENOMEM);
return dev;
}
struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
const char *name)
int ipoib_intf_init(struct ib_device *hca, u8 port, const char *name,
struct net_device *dev)
{
struct net_device *dev;
struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_dev_priv *priv;
struct rdma_netdev *rn;
int rc;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return NULL;
return -ENOMEM;
priv->ca = hca;
priv->port = port;
dev = ipoib_get_netdev(hca, port, name);
if (!dev)
goto free_priv;
rc = rdma_init_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
NET_NAME_UNKNOWN, ipoib_setup_common, dev);
if (rc) {
if (rc != -EOPNOTSUPP)
goto out;
dev->netdev_ops = &ipoib_netdev_default_pf;
rn->send = ipoib_send;
rn->attach_mcast = ipoib_mcast_attach;
rn->detach_mcast = ipoib_mcast_detach;
rn->hca = hca;
}
priv->rn_ops = dev->netdev_ops;
/* fixme : should be after the query_cap */
if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION)
dev->netdev_ops = &ipoib_netdev_ops_vf;
else
dev->netdev_ops = &ipoib_netdev_ops_pf;
rn = netdev_priv(dev);
rn->clnt_priv = priv;
/*
* Only the child register_netdev flows can handle priv_destructor
* being set, so we force it to NULL here and handle manually until it
......@@ -2196,10 +2177,35 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
ipoib_build_priv(dev);
return priv;
free_priv:
return 0;
out:
kfree(priv);
return NULL;
return rc;
}
struct net_device *ipoib_intf_alloc(struct ib_device *hca, u8 port,
const char *name)
{
struct net_device *dev;
int rc;
dev = ipoib_alloc_netdev(hca, port, name);
if (IS_ERR(dev))
return dev;
rc = ipoib_intf_init(hca, port, name, dev);
if (rc) {
free_netdev(dev);
return ERR_PTR(rc);
}
/*
* Upon success the caller must ensure ipoib_intf_free is called or
* register_netdevice succeed'd and priv_destructor is set to
* ipoib_intf_free.
*/
return dev;
}
void ipoib_intf_free(struct net_device *dev)
......@@ -2382,16 +2388,19 @@ int ipoib_add_pkey_attr(struct net_device *dev)
static struct net_device *ipoib_add_port(const char *format,
struct ib_device *hca, u8 port)
{
struct rtnl_link_ops *ops = ipoib_get_link_ops();
struct rdma_netdev_alloc_params params;
struct ipoib_dev_priv *priv;
struct net_device *ndev;
int result;
priv = ipoib_intf_alloc(hca, port, format);
if (!priv) {
pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
return ERR_PTR(-ENOMEM);
ndev = ipoib_intf_alloc(hca, port, format);
if (IS_ERR(ndev)) {
pr_warn("%s, %d: ipoib_intf_alloc failed %ld\n", hca->name, port,
PTR_ERR(ndev));
return ndev;
}
ndev = priv->dev;
priv = ipoib_priv(ndev);
INIT_IB_EVENT_HANDLER(&priv->event_handler,
priv->ca, ipoib_event);
......@@ -2412,6 +2421,14 @@ static struct net_device *ipoib_add_port(const char *format,
return ERR_PTR(result);
}
if (hca->rdma_netdev_get_params) {
int rc = hca->rdma_netdev_get_params(hca, port,
RDMA_NETDEV_IPOIB,
&params);
if (!rc && ops->priv_size < params.sizeof_priv)
ops->priv_size = params.sizeof_priv;
}
/*
* We cannot set priv_destructor before register_netdev because we
* need priv to be always valid during the error flow to execute
......
......@@ -122,12 +122,26 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
} else
child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]);
err = ipoib_intf_init(ppriv->ca, ppriv->port, dev->name, dev);
if (err) {
ipoib_warn(ppriv, "failed to initialize pkey device\n");
return err;
}
err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
child_pkey, IPOIB_RTNL_CHILD);
if (err)
return err;
if (!err && data)
if (data) {
err = ipoib_changelink(dev, tb, data, extack);
return err;
if (err) {
unregister_netdevice(dev);
return err;
}
}
return 0;
}
static size_t ipoib_get_size(const struct net_device *dev)
......@@ -149,6 +163,11 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.fill_info = ipoib_fill_info,
};
struct rtnl_link_ops *ipoib_get_link_ops(void)
{
return &ipoib_link_ops;
}
int __init ipoib_netlink_init(void)
{
return rtnl_link_register(&ipoib_link_ops);
......
......@@ -85,7 +85,7 @@ static bool is_child_unique(struct ipoib_dev_priv *ppriv,
/*
* NOTE: If this function fails then the priv->dev will remain valid, however
* priv can have been freed and must not be touched by caller in the error
* priv will have been freed and must not be touched by caller in the error
* case.
*
* If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to
......@@ -100,6 +100,12 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
ASSERT_RTNL();
/*
* We do not need to touch priv if register_netdevice fails, so just
* always use this flow.
*/
ndev->priv_destructor = ipoib_intf_free;
/*
* Racing with unregister of the parent must be prevented by the
* caller.
......@@ -120,9 +126,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
goto out_early;
}
/* We do not need to touch priv if register_netdevice fails */
ndev->priv_destructor = ipoib_intf_free;
result = register_netdevice(ndev);
if (result) {
ipoib_warn(priv, "failed to initialize; error %i", result);
......@@ -182,12 +185,12 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
snprintf(intf_name, sizeof(intf_name), "%s.%04x",
ppriv->dev->name, pkey);
priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv) {
result = -ENOMEM;
ndev = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (IS_ERR(ndev)) {
result = PTR_ERR(ndev);
goto out;
}
ndev = priv->dev;
priv = ipoib_priv(ndev);
result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
......
......@@ -4198,4 +4198,11 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *));
int rdma_init_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
struct net_device *netdev);
#endif /* IB_VERBS_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment