libmnl  1.0.4
nfct-daemon.c
1 /* A very simple skeleton code that implements a daemon that collects
2  * conntrack statistics from ctnetlink.
3  *
4  * This example is placed in the public domain.
5  */
6 #include <endian.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11 #include <time.h>
12 #include <arpa/inet.h>
13 #include <netinet/in.h>
14 #include <inttypes.h>
15 #include <errno.h>
16 
17 #include <libmnl/libmnl.h>
18 #include <linux/netlink.h>
19 #include <linux/netfilter/nfnetlink.h>
20 #include <linux/netfilter/nfnetlink_conntrack.h>
21 
22 #include <sys/queue.h>
23 
24 struct nstats {
25  LIST_ENTRY(nstats) list;
26 
27  uint8_t family;
28 
29  union {
30  struct in_addr ip;
31  struct in6_addr ip6;
32  };
33  uint64_t pkts, bytes;
34 };
35 
36 static LIST_HEAD(nstats_head, nstats) nstats_head;
37 
38 static int parse_counters_cb(const struct nlattr *attr, void *data)
39 {
40  const struct nlattr **tb = data;
41  int type = mnl_attr_get_type(attr);
42 
43  if (mnl_attr_type_valid(attr, CTA_COUNTERS_MAX) < 0)
44  return MNL_CB_OK;
45 
46  switch(type) {
47  case CTA_COUNTERS_PACKETS:
48  case CTA_COUNTERS_BYTES:
49  if (mnl_attr_validate(attr, MNL_TYPE_U64) < 0) {
50  perror("mnl_attr_validate");
51  return MNL_CB_ERROR;
52  }
53  break;
54  }
55  tb[type] = attr;
56  return MNL_CB_OK;
57 }
58 
59 static void parse_counters(const struct nlattr *nest, struct nstats *ns)
60 {
61  struct nlattr *tb[CTA_COUNTERS_MAX+1] = {};
62 
63  mnl_attr_parse_nested(nest, parse_counters_cb, tb);
64  if (tb[CTA_COUNTERS_PACKETS])
65  ns->pkts += be64toh(mnl_attr_get_u64(tb[CTA_COUNTERS_PACKETS]));
66 
67  if (tb[CTA_COUNTERS_BYTES])
68  ns->bytes += be64toh(mnl_attr_get_u64(tb[CTA_COUNTERS_BYTES]));
69 }
70 
71 static int parse_ip_cb(const struct nlattr *attr, void *data)
72 {
73  const struct nlattr **tb = data;
74  int type = mnl_attr_get_type(attr);
75 
76  if (mnl_attr_type_valid(attr, CTA_IP_MAX) < 0)
77  return MNL_CB_OK;
78 
79  switch(type) {
80  case CTA_IP_V4_SRC:
81  case CTA_IP_V4_DST:
82  if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) {
83  perror("mnl_attr_validate");
84  return MNL_CB_ERROR;
85  }
86  break;
87  case CTA_IP_V6_SRC:
88  case CTA_IP_V6_DST:
89  if (mnl_attr_validate2(attr, MNL_TYPE_BINARY,
90  sizeof(struct in6_addr)) < 0) {
91  perror("mnl_attr_validate2");
92  return MNL_CB_ERROR;
93  }
94  break;
95  }
96  tb[type] = attr;
97  return MNL_CB_OK;
98 }
99 
100 static void parse_ip(const struct nlattr *nest, struct nstats *ns)
101 {
102  struct nlattr *tb[CTA_IP_MAX+1] = {};
103 
104  mnl_attr_parse_nested(nest, parse_ip_cb, tb);
105  if (tb[CTA_IP_V4_SRC]) {
106  struct in_addr *in = mnl_attr_get_payload(tb[CTA_IP_V4_SRC]);
107  ns->ip = *in;
108  ns->family = AF_INET;
109  }
110  if (tb[CTA_IP_V6_SRC]) {
111  struct in6_addr *in = mnl_attr_get_payload(tb[CTA_IP_V6_SRC]);
112  ns->ip6 = *in;
113  ns->family = AF_INET6;
114  }
115 }
116 
117 static int parse_tuple_cb(const struct nlattr *attr, void *data)
118 {
119  const struct nlattr **tb = data;
120  int type = mnl_attr_get_type(attr);
121 
122  if (mnl_attr_type_valid(attr, CTA_TUPLE_MAX) < 0)
123  return MNL_CB_OK;
124 
125  switch(type) {
126  case CTA_TUPLE_IP:
127  if (mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0) {
128  perror("mnl_attr_validate");
129  return MNL_CB_ERROR;
130  }
131  break;
132  }
133  tb[type] = attr;
134  return MNL_CB_OK;
135 }
136 
137 static void parse_tuple(const struct nlattr *nest, struct nstats *ns)
138 {
139  struct nlattr *tb[CTA_TUPLE_MAX+1] = {};
140 
141  mnl_attr_parse_nested(nest, parse_tuple_cb, tb);
142  if (tb[CTA_TUPLE_IP])
143  parse_ip(tb[CTA_TUPLE_IP], ns);
144 }
145 
146 static int data_attr_cb(const struct nlattr *attr, void *data)
147 {
148  const struct nlattr **tb = data;
149  int type = mnl_attr_get_type(attr);
150 
151  if (mnl_attr_type_valid(attr, CTA_MAX) < 0)
152  return MNL_CB_OK;
153 
154  switch(type) {
155  case CTA_TUPLE_ORIG:
156  case CTA_COUNTERS_ORIG:
157  case CTA_COUNTERS_REPLY:
158  if (mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0) {
159  perror("mnl_attr_validate");
160  return MNL_CB_ERROR;
161  }
162  break;
163  }
164  tb[type] = attr;
165  return MNL_CB_OK;
166 }
167 
168 static int data_cb(const struct nlmsghdr *nlh, void *data)
169 {
170  struct nlattr *tb[CTA_MAX+1] = {};
171  struct nfgenmsg *nfg = mnl_nlmsg_get_payload(nlh);
172  struct nstats ns = {}, *cur, *new;
173 
174  mnl_attr_parse(nlh, sizeof(*nfg), data_attr_cb, tb);
175  if (tb[CTA_TUPLE_ORIG])
176  parse_tuple(tb[CTA_TUPLE_ORIG], &ns);
177 
178  if (tb[CTA_COUNTERS_ORIG])
179  parse_counters(tb[CTA_COUNTERS_ORIG], &ns);
180 
181  if (tb[CTA_COUNTERS_REPLY])
182  parse_counters(tb[CTA_COUNTERS_REPLY], &ns);
183 
184  /* Look up for existing statistics object ... */
185  LIST_FOREACH(cur, &nstats_head, list) {
186  if (memcmp(&ns.ip6, &cur->ip6, sizeof(struct in6_addr)) == 0) {
187  /* ... and sum counters */
188  cur->pkts += ns.pkts;
189  cur->bytes += ns.bytes;
190  return MNL_CB_OK;
191  }
192  }
193 
194  /* ... if it does not exist, add new stats object */
195  new = calloc(1, sizeof(struct nstats));
196  if (!new)
197  return MNL_CB_OK;
198 
199  new->family = ns.family;
200  new->ip6 = ns.ip6;
201  new->pkts = ns.pkts;
202  new->bytes = ns.bytes;
203 
204  LIST_INSERT_HEAD(&nstats_head, new, list);
205 
206  return MNL_CB_OK;
207 }
208 
209 static int handle(struct mnl_socket *nl)
210 {
211  char buf[MNL_SOCKET_BUFFER_SIZE];
212  int ret;
213 
214  ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
215  if (ret == -1) {
216  /* It only happens if NETLINK_NO_ENOBUFS is not set, it means
217  * we are leaking statistics.
218  */
219  if (errno == ENOBUFS) {
220  fprintf(stderr, "The daemon has hit ENOBUFS, you can "
221  "increase the size of your receiver "
222  "buffer to mitigate this or enable "
223  "reliable delivery.\n");
224  } else {
225  perror("mnl_socket_recvfrom");
226  }
227  return -1;
228  }
229 
230  ret = mnl_cb_run(buf, ret, 0, 0, data_cb, NULL);
231  if (ret == -1) {
232  perror("mnl_cb_run");
233  return -1;
234  } else if (ret <= MNL_CB_STOP)
235  return 0;
236 
237  return 0;
238 }
239 
240 int main(int argc, char *argv[])
241 {
242  struct mnl_socket *nl;
243  char buf[MNL_SOCKET_BUFFER_SIZE];
244  struct nlmsghdr *nlh;
245  struct nfgenmsg *nfh;
246  struct nstats *cur;
247  struct timeval tv = {};
248  int ret, secs, on = 1, buffersize = (1 << 22);
249 
250  if (argc != 2) {
251  printf("Usage: %s <poll-secs>\n", argv[0]);
252  exit(EXIT_FAILURE);
253  }
254  secs = atoi(argv[1]);
255 
256  LIST_INIT(&nstats_head);
257 
258  printf("Polling every %d seconds from kernel...\n", secs);
259 
260  /* Set high priority for this process, less chances to overrun
261  * the netlink receiver buffer since the scheduler gives this process
262  * more chances to run.
263  */
264  nice(-20);
265 
266  /* Open netlink socket to operate with netfilter */
267  nl = mnl_socket_open(NETLINK_NETFILTER);
268  if (nl == NULL) {
269  perror("mnl_socket_open");
270  exit(EXIT_FAILURE);
271  }
272 
273  /* Subscribe to destroy events to avoid leaking counters. The same
274  * socket is used to periodically atomically dump and reset counters.
275  */
276  if (mnl_socket_bind(nl, NF_NETLINK_CONNTRACK_DESTROY,
277  MNL_SOCKET_AUTOPID) < 0) {
278  perror("mnl_socket_bind");
279  exit(EXIT_FAILURE);
280  }
281 
282  /* Set netlink receiver buffer to 16 MBytes, to avoid packet drops */
283  setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_RCVBUFFORCE,
284  &buffersize, sizeof(socklen_t));
285 
286  /* The two tweaks below enable reliable event delivery, packets may
287  * be dropped if the netlink receiver buffer overruns. This happens ...
288  *
289  * a) if the kernel spams this user-space process until the receiver
290  * is filled.
291  *
292  * or:
293  *
294  * b) if the user-space process does not pull messages from the
295  * receiver buffer so often.
296  */
297  mnl_socket_setsockopt(nl, NETLINK_BROADCAST_ERROR, &on, sizeof(int));
298  mnl_socket_setsockopt(nl, NETLINK_NO_ENOBUFS, &on, sizeof(int));
299 
300  nlh = mnl_nlmsg_put_header(buf);
301  /* Counters are atomically zeroed in each dump */
302  nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) |
303  IPCTNL_MSG_CT_GET_CTRZERO;
304  nlh->nlmsg_flags = NLM_F_REQUEST|NLM_F_DUMP;
305 
306  nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
307  nfh->nfgen_family = AF_INET;
308  nfh->version = NFNETLINK_V0;
309  nfh->res_id = 0;
310 
311  /* Filter by mark: We only want to dump entries whose mark is zero */
312  mnl_attr_put_u32(nlh, CTA_MARK, htonl(0));
313  mnl_attr_put_u32(nlh, CTA_MARK_MASK, htonl(0xffffffff));
314 
315  while (1) {
316  int fd_max = mnl_socket_get_fd(nl);
317  fd_set readfds;
318 
319  /* Every N seconds ... */
320  if (tv.tv_sec == 0 && tv.tv_usec == 0) {
321  /* ... request a fresh dump of the table from kernel */
322  ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
323  if (ret == -1) {
324  perror("mnl_socket_sendto");
325  return -1;
326  }
327  tv.tv_sec = secs;
328  tv.tv_usec = 0;
329 
330  /* print the content of the list */
331  LIST_FOREACH(cur, &nstats_head, list) {
332  char out[INET6_ADDRSTRLEN];
333 
334  if (inet_ntop(cur->family, &cur->ip, out, sizeof(out)))
335  printf("src=%s ", out);
336 
337  printf("counters %"PRIu64" %"PRIu64"\n",
338  cur->pkts, cur->bytes);
339  }
340  }
341 
342  FD_ZERO(&readfds);
343  FD_SET(mnl_socket_get_fd(nl), &readfds);
344 
345  ret = select(fd_max+1, &readfds, NULL, NULL, &tv);
346  if (ret < 0) {
347  if (errno == EINTR)
348  continue;
349 
350  perror("select");
351  exit(EXIT_FAILURE);
352  }
353 
354  /* Handled event and periodic atomic-dump-and-reset messages */
355  if (FD_ISSET(mnl_socket_get_fd(nl), &readfds)) {
356  if (handle(nl) < 0)
357  return EXIT_FAILURE;
358  }
359  }
360 
361  mnl_socket_close(nl);
362 
363  return 0;
364 }