I have been experimenting with netlink sockets and kernel modules, sending messages back and forth between a kernel moduel and a userspace program. I have looked at various examples and found a lot of variability (and ambiguity, and just plain erroneous info).

So there seems to be (at least) two ways to receive messages - the first, simplest, where netlink_kernel_create() is given the name of a handling function directly, is as follows

Code:

//================================================

static void hello_nl_recv_msg (struct sk_buff	*skb)
{
   struct nlmsghdr   *nlh;
   int               pid;

   nlh = (struct nlmsghdr*)skb->data;
   printk (KERN_INFO "KTEST: ktest:%s() received msg %d: %s\n",
           __FUNCTION__, g_seq, (char*)nlmsg_data(nlh));

   pid = nlh->nlmsg_pid;      // pid of sending process
   msg_respond (pid);
}

//================================================

static int __init hello_init(void)
{
   printk ("KTEST: ktest module ('%s()') starting ...\n", __FUNCTION__);

   nl_sk = netlink_kernel_create (&init_net,
                                  NETLINK_USER,
                                  0,
                                  hello_nl_recv_msg,
                                  NULL,
                                  THIS_MODULE);
   if (!nl_sk)
   {
      printk (KERN_ALERT "KTEST: Error creating socket.\n");
      return -10;
   }


   return 0;
}
The second way is a little more involved, where the function passed to netlink_kernl_create() calls the handling function indirectly, by passing it to netlink_rcv_skb():

Code:

//=================================================

static int nl_rcv_msg (struct sk_buff  *skb,
                       struct nlmsghdr *nlh)
{
   int               pid;

   pid = nlh->nlmsg_pid;      // pid of sending process

   printk (KERN_INFO "KTEST: ktest:%s() received msg %d: %s\n",
           __FUNCTION__, g_seq, (char*)nlmsg_data(nlh));

   msg_respond (pid);
   return 0;
}

//=================================================

static void nl_msg_receive (struct sk_buff *skb)
{
   printk (KERN_INFO "KTEST: ktest:%s() received msg %d: %s\n",
           __FUNCTION__, g_seq,
           (char*)nlmsg_data((struct nlmsghdr*)skb->data));

   mutex_lock (&nl_mutex);
   netlink_rcv_skb (skb, &nl_rcv_msg);
   mutex_unlock (&nl_mutex);
}

//===================================================

static int __init hello_init(void)
{
   printk ("KTEST: ktest module ('%s()') starting ...\n", __FUNCTION__);

   nl_sk = netlink_kernel_create (&init_net,
                                  NETLINK_USER,
                                  0,
                                  nl_msg_receive,
                                  NULL,
                                  THIS_MODULE);
   if (!nl_sk)
   {
      printk (KERN_ALERT "KTEST: Error creating socket.\n");
      return -10;
   }

   return 0;
}
SO - I've noticed that with the first method, the userspace program, when setting up the netlink message to send, can set the nlmsg_flags field in nlmsghdr to 0, and the nlmsg_type field to NLMSG_DONE, as was indicated in all of the examples I looked at (regardless of the method used). HOWEVER the second (more involved) method does not work unless the userspace program sets nlmsg_flags to NLM_F_REQUEST, and nlmsg_type to NLMSG_MIN_TYPE+1 or greater.

It's really no big deal to set the flags to use the second method, as long as you know about it (if befuddled me for a few days until I broke down and read the kernel code).

So here is my question, to wiser people - when/why would one want to use one method over the other?

I am testing this on a Mint distro using v 3.2.0-2 of the kernel, running it as a virtual machine in Virtualbox on a Macbook Pro.