文章目录
这篇笔记记录了Netlink协议族的初始化,并对Netlink消息格式及对应的内核态API进行了详细解释。Netlink用户态编程麻烦的一点是用户态对Neltink消息的处理,因为内核只暴露了很少一部分API,所以建议用户态编程还是基于libnl更方便一些。
文件 | 说明 |
---|---|
net/netlink/af_netlink.c | Netlink协议族实现文件 |
include/net/netlink.h | 仅内核态可见的Netlink协议族头文件 |
include/linux/netlink.h | 用户态也可见的Netlink协议族头文件 |
数据结构
协议对象: netlink_table
每个Netlink协议对应一个netlink_table对象,系统用nl_table数组保存所有的Netlink协议,数组的索引就是socket(2)中的protocol参数,数组长度为MAX_LINKS。
struct netlink_table {
struct nl_pid_hash hash; // 哈希表,以pid为key,保存了该协议所有的已绑定传输控制块对象
struct hlist_head mc_list; // 组织所有监听该协议的多播数据的传输控制块对象
unsigned long *listeners; // 标记该协议哪些多播组被监听,被监听多播组对应bit为1
unsigned int nl_nonroot;
unsigned int groups; // 协议支持的多播组个数,最小32个组
struct mutex *cb_mutex;
struct module *module;
int registered; // 标识该协议对象是否已经完成注册
};
#define MAX_LINKS 32
static struct netlink_table *nl_table;
static DEFINE_RWLOCK(nl_table_lock); // 见netlink_lock_table()和netlink_unlock_table()
static atomic_t nl_table_users = ATOMIC_INIT(0);
- nl_nonroot
影响该协议的组播权限。默认情况下,只有超级用户或者具有CAP_NET_ADMIN权限的用户才可以收发Netlink组播消息,如果协议实现指定了该字段,那么表示非root用户也可以具备组播消息收发权限,可取的值如下:
#define NL_NONROOT_RECV 0x1
#define NL_NONROOT_SEND 0x2
相关函数如下:
// 设置的nl_nonroot字段
void netlink_set_nonroot(int protocol, unsigned flag);
// 识别指定socket是否有对应的Netlink权限
static inline int netlink_capable(struct socket *sock, unsigned int flag)
{
return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
capable(CAP_NET_ADMIN);
}
协议族初始化
在Netlink协议族初始化时,就向系统注册了自己的socket创建接口,代码如下所示。
static struct net_proto_family netlink_family_ops = {
.family = PF_NETLINK,
.create = netlink_create, // socket创建函数
.owner = THIS_MODULE, /* for consistency 8) */
};
static int __init netlink_proto_init(void)
{
struct sk_buff *dummy_skb;
int i;
unsigned long limit;
unsigned int order;
// 向系统注册netlink协议套接字信息
int err = proto_register(&netlink_proto, 0);
if (err != 0)
goto out;
// 检查skb控制块大小定义没有超过限制
BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
// 分配nl_table数组用于保存netlink协议对象
nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
if (!nl_table)
goto panic;
// 根据系统物理内存大小,为每个协议分配pid哈希表
if (num_physpages >= (128 * 1024))
limit = num_physpages >> (21 - PAGE_SHIFT);
else
limit = num_physpages >> (23 - PAGE_SHIFT);
order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
limit = (1UL << order) / sizeof(struct hlist_head);
order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
for (i = 0; i < MAX_LINKS; i++) {
struct nl_pid_hash *hash = &nl_table[i].hash;
hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
if (!hash->table) {
while (i-- > 0)
nl_pid_hash_free(nl_table[i].hash.table, 1 * sizeof(*hash->table));
kfree(nl_table);
goto panic;
}
hash->max_shift = order;
hash->shift = 0;
hash->mask = 0;
hash->rehash_time = jiffies;
}
// 向系统注册AF_NETLINK协议族
sock_register(&netlink_family_ops);
// 协议命名空间相关初始化,其中会创建/proc/net/netlink文件
register_pernet_subsys(&netlink_net_ops);
/* The netlink device handler may be needed early. */
rtnetlink_init(); // Route Netlink协议初始化
out:
return err;
panic:
panic("netlink_init: Cannot allocate nl_table\n");
}
core_initcall(netlink_proto_init);
Netlink协议管理
如数据结构部分介绍,每个Netlink协议对应一个netlink_table对象,协议族将所有的协议对象保存在nl_table数组中。
Netlink协议注册: netlink_kernel_create()
具体的协议实现需要通过该接口向协议族注册自己。
@unit: 协议类型,对应socket(2)中的protocol参数
@groups: 支持的组播个数
@input: 协议实现提供的数据接收函数
struct sock *netlink_kernel_create(struct net *net, int unit, unsigned int groups,
void (*input)(struct sk_buff *skb), struct mutex *cb_mutex, struct module *module)
{
struct socket *sock;
struct sock *sk;
struct netlink_sock *nlk;
unsigned long *listeners = NULL;
BUG_ON(!nl_table);
// 协议类型检查
if (unit < 0 || unit >= MAX_LINKS)
return NULL;
// 在内核态创建一个Netlink套接字
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
/* * We have to just have a reference on the net from sk, but don't * get_net it. Besides, we cannot get and then put the net here. * So we create one inside init_net and the move it to net. */
if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
goto out_sock_release_nosk;
sk = sock->sk;
sk_change_net(sk, net);
// 根据多播组个数分配listeners数组
if (groups < 32) // 最少32个组播
groups = 32;
listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL);
if (!listeners)
goto out_sock_release;
sk->sk_data_ready = netlink_data_ready;
if (input) // 保存接收回调
nlk_sk(sk)->netlink_rcv = input;
// 将创建的内核态socket放入协议的pid哈希表中,这表示该套接字已完成绑定过程,
// 特别注意其pid参数为0,这就是port id为0代表内核态的原因
if (netlink_insert(sk, net, 0))
goto out_sock_release;
nlk = nlk_sk(sk);
nlk->flags |= NETLINK_KERNEL_SOCKET; // 标识是内核态套接字
// 互斥访问nl_table
netlink_table_grab();
if (!nl_table[unit].registered) {
// 首次调用就是注册
nl_table[unit].groups = groups;
nl_table[unit].listeners = listeners;
nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
nl_table[unit].registered = 1;
} else {
// 重复注册
kfree(listeners);
nl_table[unit].registered++;
}
netlink_table_ungrab();
return sk;
out_sock_release:
kfree(listeners);
netlink_kernel_release(sk);
return NULL;
out_sock_release_nosk:
sock_release(sock);
return NULL;
}
和netlink_kernel_create()对应的,有netlink_kernel_release()函数用于执行去注册过程。
该函数中还有一个很重要的一个逻辑是为注册的协议创建了一个内核态的socket,并将其加如到了该协议的pid哈希表中,由于没有执行过绑定操作,所以该socket的pid就是0,该pid代表的就是内核,当用户态向pid为0的的协议发送消息时,匹配到的正是通过该socket。
Netlink消息
一个Netlink消息是由一个或多个消息单元组成的字节流,每个消息单元由nlmsghdr头及其后面的payload组成。如果一个字节流包含多个消息单元,那么它是一个多部消息,该多部消息的所有消息单元的nlmsg_flags必须包含NLM_F_MULTI标记,而且最后一个消息单元的nlmsg_type必须时NLMSG_DONE。
消息头
struct nlmsghdr
{
__u32 nlmsg_len; /* Length of message including header */
__u16 nlmsg_type; /* Message content */
__u16 nlmsg_flags; /* Additional flags */
__u32 nlmsg_seq; /* Sequence number */
__u32 nlmsg_pid; /* Sending process port ID */
};
-
nlmsg_len: nlmsghdr加payload的长度,代表了一个消息单元的总长度;
-
nlmsg_type: 消息类型,在协议内唯一标识一个Netlink消息;内核定义了几个标准的消息类型,其含义如下:
- NLMSG_NOOP:空消息,不清楚有什么使用场景;
- NLMSG_ERROR:表示错误消息,此时payload格式为nlmsgerr;
struct nlmsgerr
{
int error; // 错误码
struct nlmsghdr msg; // 导致产生该错误消息的Netlink消息首部
};
- NLMSG_DONE:标识一个多部消息的结束;
- nlmsg_seq: 每个消息都应该有一个唯一的标识;
- nlmsg_pid: 消息发送方的pid标识;
消息整体格式
在内核源码中有如下注释,清晰的表达了Netlink消息的格式:
/* ======================================================================== * * Message Format: * <--- nlmsg_total_size(payload) ---> * <-- nlmsg_msg_size(payload) -> * +----------+- - -+-------------+- - -+-------- - - * | nlmsghdr | Pad | Payload | Pad | nlmsghdr * +----------+- - -+-------------+- - -+-------- - - * nlmsg_data(nlh)---^ ^ * nlmsg_next(nlh)-----------------------+ * * ========================================================================= */
注:消息首部中的nlmsg_len相当于nlmsg_msg_size(),不包含消息末尾的Pad。
相关API
/** * nlmsg_msg_size - length of netlink message not including padding * @payload: length of message payload */
static inline int nlmsg_msg_size(int payload);
/** * nlmsg_total_size - length of netlink message including padding * @payload: length of message payload */
static inline int nlmsg_total_size(int payload);
/** * nlmsg_data - head of message payload * @nlh: netlink messsage header */
static inline void *nlmsg_data(const struct nlmsghdr *nlh);
/** * nlmsg_next - next netlink message in message stream * @nlh: netlink message header * @remaining: number of bytes remaining in message stream * * Returns the next netlink message in the message stream and * decrements remaining by the size of the current message. */
static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining);
消息payload格式
如下图,可以看出,payload的开头部分包含一个协议首部,该首部完全是有具体协议指定的,协议族框架代码并不会去解释该协议首部,payload部分完全由具体协议自己解释。
/* ======================================================================== * * Payload Format: * <---------------------- nlmsg_len(nlh) ---------------------> * <------ hdrlen ------> <- nlmsg_attrlen(nlh, hdrlen) -> * +----------------------+- - -+--------------------------------+ * | Family Header | Pad | Attributes | * +----------------------+- - -+--------------------------------+ * nlmsg_attrdata(nlh, hdrlen)---^ * * ========================================================================= */
相关API
/** * nlmsg_len - length of message payload * @nlh: netlink message header */
static inline int nlmsg_len(const struct nlmsghdr *nlh);
/** * nlmsg_attrdata - head of attributes data * @nlh: netlink message header * @hdrlen: length of family specific header */
static inline struct nlattr *nlmsg_attrdata(const struct nlmsghdr *nlh,
int hdrlen);
/** * nlmsg_attrlen - length of attributes data * @nlh: netlink message header * @hdrlen: length of family specific header */
static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen);
消息属性格式
消息payload部分的核心是属性,每个payload可以包含多个属性,属性格式如下:
/* ======================================================================== * * Attribute Format: * <------- nla_total_size(payload) -------> * <---- nla_attr_size(payload) -----> * +----------+- - -+- - - - - - - - - +- - -+-------- - - * | Header | Pad | Payload | Pad | Header * +----------+- - -+- - - - - - - - - +- - -+-------- - - * <- nla_len(nla) -> ^ * nla_data(nla)----^ | * nla_next(nla)-----------------------------' * *========================================================================= */
其中,属性头的定义如下,其实就是TLV格式:
struct nlattr
{
__u16 nla_len;
__u16 nla_type;
};
相关API
/** * nla_total_size - total length of attribute including padding * @payload: length of payload */
static inline int nla_total_size(int payload);
/** * nla_attr_size - length of attribute not including padding * @payload: length of payload */
static inline int nla_attr_size(int payload);
/** * nla_data - head of payload * @nla: netlink attribute */
static inline void *nla_data(const struct nlattr *nla);
/** * nla_next - next netlink attribute in attribute stream * @nla: netlink attribute * @remaining: number of bytes remaining in attribute stream * * Returns the next netlink attribute in the attribute stream and * decrements remaining by the size of the current attribute. */
static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining);
内核其它消息API
消息的构造
通常使用nlmsg_new()分配一个新的skb来封装Netlink消息。
/** * nlmsg_new - Allocate a new netlink message * @payload: size of the message payload * @flags: the type of memory to allocate. * * Use NLMSG_DEFAULT_SIZE if the size of the payload isn't known * and a good default is needed. */
static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags);
之后使用nlmsg_put()填充该Netlink消息首部。
/** * nlmsg_put - Add a new netlink message to an skb * @skb: socket buffer to store message in * @pid: netlink process id, 指发送方 * @seq: sequence number of message * @type: message type * @payload: length of message payload * @flags: message flags * * Returns NULL if the tailroom of the skb is insufficient to store * the message header and payload. */
static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
int type, int payload, int flags);
在所有数据(包括payload)填充完毕后,使用nlmsg_end()填充首部的nlmsg_len字段,完成消息的封装。
/** * nlmsg_end - Finalize a netlink message * @skb: socket buffer the message is stored in * @nlh: netlink message header * * Corrects the netlink message header to include the appeneded * attributes. Only necessary if attributes have been added to * the message. * * Returns the total data length of the skb. */
static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh);
最后,消息的释放由nlmsg_free()完成。
/** * nlmsg_free - free a netlink message * @skb: socket buffer of netlink message */
static inline void nlmsg_free(struct sk_buff *skb);
消息的解析
/** * nlmsg_ok - check if the netlink message fits into the remaining bytes * @nlh: netlink message header * @remaining: number of bytes remaining in message stream */
static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining);
/** * nlmsg_next - next netlink message in message stream * @nlh: netlink message header * @remaining: number of bytes remaining in message stream * * Returns the next netlink message in the message stream and * decrements remaining by the size of the current message. */
static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining);
/** * nlmsg_parse - parse attributes of a netlink message * @nlh: netlink message header * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy * * See nla_parse() */
static inline int nlmsg_parse(struct nlmsghdr *nlh, int hdrlen,
struct nlattr *tb[], int maxtype,
const struct nla_policy *policy);
/** * nlmsg_find_attr - find a specific attribute in a netlink message * @nlh: netlink message header * @hdrlen: length of familiy specific header * @attrtype: type of attribute to look for * * Returns the first attribute which matches the specified type. */
static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh,
int hdrlen, int attrtype);
/** * nlmsg_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute * @nlh: netlink message header * @hdrlen: length of familiy specific header * @rem: initialized to len, holds bytes currently remaining in stream */
#define nlmsg_for_each_attr(pos, nlh, hdrlen, rem);
/** * nlmsg_for_each_msg - iterate over a stream of messages * @pos: loop counter, set to current message * @head: head of message stream * @len: length of message stream * @rem: initialized to len, holds bytes currently remaining in stream */
#define nlmsg_for_each_msg(pos, head, len, rem);
消息的发送
单播消息发送接口为nlmsg_unicast()。
/** * nlmsg_unicast - unicast a netlink message * @sk: netlink socket to spread message to * @skb: netlink message as socket buffer * @pid: netlink pid of the destination socket */
static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 pid);
组播消息的发送接口为nlmsg_multicast()。
/** * nlmsg_multicast - multicast a netlink message * @sk: netlink socket to spread messages to * @skb: netlink message as socket buffer * @pid: own netlink pid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */
static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
u32 pid, unsigned int group, gfp_t flags);
消息属性相关API
* Attribute Construction:
* nla_put(skb, type, len, data) add attribute to skb
*
* Attribute Construction for Basic Types:
* nla_put_u8(skb, type, value) add u8 attribute to skb
* nla_put_u16(skb, type, value) add u16 attribute to skb
* nla_put_u32(skb, type, value) add u32 attribute to skb
* nla_put_u64(skb, type, value) add u64 attribute to skb
* nla_put_string(skb, type, str) add string attribute to skb
* nla_put_flag(skb, type) add flag attribute to skb
* nla_put_msecs(skb, type, jiffies) add msecs attribute to skb
*
* Nested Attributes Construction:
* nla_nest_start(skb, type) start a nested attribute
* nla_nest_end(skb, nla) finalize a nested attribute
* nla_nest_cancel(skb, nla) cancel nested attribute construction
*
* Attribute Length Calculations:
* nla_attr_size(payload) length of attribute w/o padding
* nla_total_size(payload) length of attribute w/ padding
* nla_padlen(payload) length of padding
*
* Attribute Payload Access:
* nla_data(nla) head of attribute payload
* nla_len(nla) length of attribute payload
*
* Attribute Payload Access for Basic Types:
* nla_get_u8(nla) get payload for a u8 attribute
* nla_get_u16(nla) get payload for a u16 attribute
* nla_get_u32(nla) get payload for a u32 attribute
* nla_get_u64(nla) get payload for a u64 attribute
* nla_get_flag(nla) return 1 if flag is true
* nla_get_msecs(nla) get payload for a msecs attribute
*
* Attribute Misc:
* nla_memcpy(dest, nla, count) copy attribute into memory
* nla_memcmp(nla, data, size) compare attribute with memory area
* nla_strlcpy(dst, nla, size) copy attribute to a sized string
* nla_strcmp(nla, str) compare attribute with string
*
* Attribute Parsing:
* nla_ok(nla, remaining) does nla fit into remaining bytes?
* nla_next(nla, remaining) get next netlink attribute
* nla_validate() validate a stream of attributes
* nla_validate_nested() validate a stream of nested attributes
* nla_find() find attribute in stream of attributes
* nla_find_nested() find attribute in nested attributes
* nla_parse() parse and validate stream of attrs
* nla_parse_nested() parse nested attribuets
* nla_for_each_attr() loop over all attributes
* nla_for_each_nested() loop over the nested attributes
*=========================================================================
*/
用户态消息宏
用户态常用的一些消息宏如下,靠这些宏是不足以解析属性部分的,所以用户态编程建议基于libnl库。
// 将一个长度向上调整到4字节对齐
#define NLMSG_ALIGNTO 4
#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
// 一个Netlink消息首部长度的长度
#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
// 给定一个payload长度,该宏计算出一个可以用来填充nlmsg_len字段得长度值
#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN))
// 返回一个Netlink消息占用得总长度(首部+payload+对齐填充)
#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
// 返回一个Netlink消息得payload指针
#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
// len是一个buffer的剩余长度,该宏会递减它,然后返回一个多部消息的下一个消息头,调用者需要保证还有消息
#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len)))
// 检查一个消息是否被完整并且能够被解析
#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ (nlh)->nlmsg_len <= (len))
// 给定len为首部长度,那么该宏返回payload的长度
#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
今天的文章Netlink协议族分享到此就结束了,感谢您的阅读。
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
如需转载请保留出处:https://bianchenghao.cn/12638.html