//linux_netns.c
/* SPDX-License-Identifier: GPL-2.0 */
#define _ATFILE_SOURCE
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/inotify.h>
#include <sys/mount.h>
#include <sys/syscall.h>
#include <stdio.h>
#include <string.h>
#include <sched.h>
#include <fcntl.h>
#include <dirent.h>
#include <errno.h>
#include <time.h>
#include <unistd.h>
#include <ctype.h>
#include <linux/limits.h>
#include <stdbool.h>
#include <linux/net_namespace.h>
#include "linux_netns.h"
#include <stdlib.h>
#define _GNU_SOURCE
int unshare(int flags);
struct rtnl_handle rth = { .fd = -1 };
int rcvbuf = 1024 * 1024;
int
rtnl_open_netns(char *nsname)
{
int fd = -1;
char path[100] = {0};
if (!nsname || !strlen(nsname)) {
sprintf(path, "%s", "/proc/1/ns/net");
} else if (strpbrk(nsname, "/") != NULL) {
sprintf(path, "%s", nsname);
} else {
sprintf(path, "/var/run/netns/%s", nsname);
}
if ((fd = open(path, O_RDONLY)) < 0) {
fprintf(stderr, "open stream %s: ", path);
return -1;
}
return fd;
}
static int netns_get_fd(char *name)
{
char nsname = 0;
if (strcmp(name, "default") == 0){
return rtnl_open_netns(&nsname);
}else{
return rtnl_open_netns(name);
}
}
/* No extended error ack without libmnl */
static int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn)
{
return 0;
}
static int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
int alen)
{
int len = RTA_LENGTH(alen);
struct rtattr *rta;
if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
fprintf(stderr,
"addattr_l ERROR: message exceeded bound of %d\n",
maxlen);
return -1;
}
rta = NLMSG_TAIL(n);
rta->rta_type = type;
rta->rta_len = len;
if (alen)
memcpy(RTA_DATA(rta), data, alen);
n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
return 0;
}
static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err,
nl_ext_ack_fn_t errfn)
{
if (nl_dump_ext_ack(h, errfn))
return;
fprintf(stderr, "RTNETLINK answers: %s\n",
strerror(-err->error));
}
static int __rtnl_recvmsg(int fd, struct msghdr *msg, int flags)
{
int len;
do {
len = recvmsg(fd, msg, flags);
} while (len < 0 && (errno == EINTR || errno == EAGAIN));
if (len < 0) {
fprintf(stderr, "netlink receive error %s (%d)\n",
strerror(errno), errno);
return -errno;
}
if (len == 0) {
fprintf(stderr, "EOF on netlink\n");
return -ENODATA;
}
return len;
}
static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
{
struct iovec *iov = msg->msg_iov;
char *buf;
int len;
iov->iov_base = NULL;
iov->iov_len = 0;
len = __rtnl_recvmsg(fd, msg, MSG_PEEK | MSG_TRUNC);
if (len < 0)
return len;
buf = malloc(len);
if (!buf) {
fprintf(stderr, "malloc error: not enough buffer\n");
return -ENOMEM;
}
iov->iov_base = buf;
iov->iov_len = len;
len = __rtnl_recvmsg(fd, msg, 0);
if (len < 0) {
free(buf);
return len;
}
if (answer)
*answer = buf;
else
free(buf);
return len;
}
static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
struct nlmsghdr **answer,
bool show_rtnl_err, nl_ext_ack_fn_t errfn)
{
int status;
unsigned int seq;
struct nlmsghdr *h;
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct iovec iov = {
.iov_base = n,
.iov_len = n->nlmsg_len
};
struct msghdr msg = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};
char *buf;
n->nlmsg_seq = seq = ++rtnl->seq;
if (answer == NULL)
n->nlmsg_flags |= NLM_F_ACK;
status = sendmsg(rtnl->fd, &msg, 0);
if (status < 0) {
perror("Cannot talk to rtnetlink");
return -1;
}
while (1) {
status = rtnl_recvmsg(rtnl->fd, &msg, &buf);
if (status < 0)
return status;
if (msg.msg_namelen != sizeof(nladdr)) {
fprintf(stderr,
"sender address length == %d\n",
msg.msg_namelen);
exit(1);
}
for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) {
int len = h->nlmsg_len;
int l = len - sizeof(*h);
if (l < 0 || len > status) {
if (msg.msg_flags & MSG_TRUNC) {
fprintf(stderr, "Truncated message\n");
free(buf);
return -1;
}
fprintf(stderr,
"!!!malformed message: len=%d\n",
len);
exit(1);
}
if (nladdr.nl_pid != 0 ||
h->nlmsg_pid != rtnl->local.nl_pid ||
h->nlmsg_seq != seq) {
/* Don't forget to skip that message. */
status -= NLMSG_ALIGN(len);
h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
continue;
}
if (h->nlmsg_type == NLMSG_ERROR) {
struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
if (l < sizeof(struct nlmsgerr)) {
fprintf(stderr, "ERROR truncated\n");
} else if (!err->error) {
/* check messages from kernel */
nl_dump_ext_ack(h, errfn);
if (answer)
*answer = (struct nlmsghdr *)buf;
else
free(buf);
return 0;
}
if (rtnl->proto != NETLINK_SOCK_DIAG &&
show_rtnl_err)
rtnl_talk_error(h, err, errfn);
errno = -err->error;
free(buf);
return -1;
}
if (answer) {
*answer = (struct nlmsghdr *)buf;
return 0;
}
fprintf(stderr, "Unexpected reply!!!\n");
status -= NLMSG_ALIGN(len);
h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
}
free(buf);
if (msg.msg_flags & MSG_TRUNC) {
fprintf(stderr, "Message truncated\n");
continue;
}
if (status) {
fprintf(stderr, "!!!Remnant of size %d\n", status);
exit(1);
}
}
}
static int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
struct nlmsghdr **answer)
{
return __rtnl_talk(rtnl, n, answer, true, NULL);
}
static int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions,
int protocol)
{
socklen_t addr_len;
int sndbuf = 32768;
int one = 1;
memset(rth, 0, sizeof(*rth));
rth->proto = protocol;
rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
if (rth->fd < 0) {
perror("Cannot open netlink socket");
return -1;
}
if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF,
&sndbuf, sizeof(sndbuf)) < 0) {
perror("SO_SNDBUF");
return -1;
}
if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF,
&rcvbuf, sizeof(rcvbuf)) < 0) {
perror("SO_RCVBUF");
return -1;
}
/* Older kernels may no support extended ACK reporting */
setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK,
&one, sizeof(one));
memset(&rth->local, 0, sizeof(rth->local));
rth->local.nl_family = AF_NETLINK;
rth->local.nl_groups = subscriptions;
if (bind(rth->fd, (struct sockaddr *)&rth->local,
sizeof(rth->local)) < 0) {
perror("Cannot bind netlink socket");
return -1;
}
addr_len = sizeof(rth->local);
if (getsockname(rth->fd, (struct sockaddr *)&rth->local,
&addr_len) < 0) {
perror("Cannot getsockname");
return -1;
}
if (addr_len != sizeof(rth->local)) {
fprintf(stderr, "Wrong address length %d\n", addr_len);
return -1;
}
if (rth->local.nl_family != AF_NETLINK) {
fprintf(stderr, "Wrong address family %d\n",
rth->local.nl_family);
return -1;
}
rth->seq = time(NULL);
return 0;
}
static int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
{
return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE);
}
static void rtnl_close(struct rtnl_handle *rth)
{
if (rth->fd >= 0) {
close(rth->fd);
rth->fd = -1;
}
}
int iplink_set_if_to_ns(int if_index, char * nsname)
{
int netns = -1;
struct iplink_req req = {
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.n.nlmsg_flags = NLM_F_REQUEST | 0,
.n.nlmsg_type = RTM_NEWLINK,
.i.ifi_family = AF_UNSPEC,
};
if (rtnl_open(&rth, 0) < 0)
return -1;
netns = netns_get_fd(nsname);
if (netns >= 0)
addattr_l(&req.n, sizeof(req), IFLA_NET_NS_FD,
&netns, 4);
else
fprintf(stderr, "Cannot find netns \"%s\"\n", nsname);
req.i.ifi_index = if_index;
if (rtnl_talk(&rth, &req.n, NULL) < 0)
return -2;
close(netns);
rtnl_close(&rth);
return 0;
}
/****************************************ns add/del*/
static int on_netns_del(char *nsname)
{
char netns_path[PATH_MAX];
snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, nsname);
umount2(netns_path, MNT_DETACH);
if (unlink(netns_path) < 0) {
fprintf(stderr, "Cannot remove namespace file \"%s\": %s\n",
netns_path, strerror(errno));
return -1;
}
return 0;
}
int netns_delete(char *nsname)
{
if (nsname == NULL) {
fprintf(stderr, "No netns name specified\n");
return -1;
}
return on_netns_del(nsname);
}
static int create_netns_dir(void)
{
/* Create the base netns directory if it doesn't exist */
if (mkdir(NETNS_RUN_DIR, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)) {
if (errno != EEXIST) {
fprintf(stderr, "mkdir %s failed: %s\n",
NETNS_RUN_DIR, strerror(errno));
return -1;
}
}
return 0;
}
int netns_add(char *nsname)
{
/* This function creates a new network namespace and
* a new mount namespace and bind them into a well known
* location in the filesystem based on the name provided.
*
* The mount namespace is created so that any necessary
* userspace tweaks like remounting /sys, or bind mounting
* a new /etc/resolv.conf can be shared between uers.
*/
char netns_path[PATH_MAX];
char *name;
int fd;
int made_netns_run_dir_mount = 0;
if (nsname == NULL) {
fprintf(stderr, "No netns name specified\n");
return -1;
}
name = nsname;
snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name);
if (create_netns_dir())
return -1;
/* Make it possible for network namespace mounts to propagate between
* mount namespaces. This makes it likely that a unmounting a network
* namespace file in one namespace will unmount the network namespace
* file in all namespaces allowing the network namespace to be freed
* sooner.
*/
while (mount("", NETNS_RUN_DIR, "none", MS_SHARED | MS_REC, NULL)) {
/* Fail unless we need to make the mount point */
if (errno != EINVAL || made_netns_run_dir_mount) {
fprintf(stderr, "mount --make-shared %s failed: %s\n",
NETNS_RUN_DIR, strerror(errno));
return -1;
}
/* Upgrade NETNS_RUN_DIR to a mount point */
if (mount(NETNS_RUN_DIR, NETNS_RUN_DIR, "none", MS_BIND | MS_REC, NULL)) {
fprintf(stderr, "mount --bind %s %s failed: %s\n",
NETNS_RUN_DIR, NETNS_RUN_DIR, strerror(errno));
return -1;
}
made_netns_run_dir_mount = 1;
}
/* Create the filesystem state */
fd = open(netns_path, O_RDONLY|O_CREAT|O_EXCL, 0);
if (fd < 0) {
fprintf(stderr, "Cannot create namespace file \"%s\": %s\n",
netns_path, strerror(errno));
return -1;
}
close(fd);
if (unshare(CLONE_NEWNET) < 0) {
fprintf(stderr, "Failed to create a new network namespace \"%s\": %s\n",
name, strerror(errno));
goto out_delete;
}
/* Bind the netns last so I can watch for it */
if (mount("/proc/self/ns/net", netns_path, "none", MS_BIND, NULL) < 0) {
fprintf(stderr, "Bind /proc/self/ns/net -> %s failed: %s\n",
netns_path, strerror(errno));
goto out_delete;
}
return 0;
out_delete:
netns_delete(name);
return -1;
}
int netns_switch(char *name)
{
int netns;
netns = netns_get_fd(name);
if (netns < 0) {
fprintf(stderr, "Cannot open network namespace \"%s\": %s\n",
name, strerror(errno));
return -1;
}
if (setns(netns, CLONE_NEWNET) < 0) {
fprintf(stderr, "setting the network namespace \"%s\" failed: %s\n",
name, strerror(errno));
close(netns);
return -1;
}
close(netns);
return 0;
}
//linux_netns.h
#ifndef LINUX_NETNS_H_
#define LINUX_NETNS_H_
#include <linux/kernel.h>
#include <linux/socket.h> /* for __kernel_sa_family_t */
#include <linux/types.h>
#include <sys/socket.h>
#include <linux/rtnetlink.h>
#include <linux/netlink.h>
#include <net/if.h>
#include <sched.h>
#include <sys/mount.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <errno.h>
#define NETNS_RUN_DIR "/var/run/netns"
#define DEFAULT_NETNS_RUN_DIR "/proc/self/ns/net"
#define NETNS_ETC_DIR "/etc/netns"
#ifndef CLONE_NEWNET
#define CLONE_NEWNET 0x40000000 /* New network namespace (lo, device, names sockets, etc) */
#endif
#define NLMSG_ALIGNTO 4U
#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
#define NLM_F_REQUEST 0x01 /* It is request message. */
/* Macros to handle rtattributes */
#define RTA_ALIGNTO 4U
#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) )
#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \
(rta)->rta_len >= sizeof(struct rtattr) && \
(rta)->rta_len <= (len))
#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \
(struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len))
#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len))
#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0))
#define NLMSG_TAIL(nmsg) \
((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
/* Flags values */
#define NLM_F_REQUEST 0x01 /* It is request message. */
#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */
#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */
#define NLM_F_ECHO 0x08 /* Echo this request */
#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */
#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */
#define NETLINK_SOCK_DIAG 4 /* socket monitoring */
struct rtnl_handle {
int fd;
struct sockaddr_nl local;
struct sockaddr_nl peer;
__u32 seq;
__u32 dump;
int proto;
FILE *dump_fp;
#define RTNL_HANDLE_F_LISTEN_ALL_NSID 0x01
#define RTNL_HANDLE_F_SUPPRESS_NLERR 0x02
int flags;
};
#ifndef CLONE_NEWNET
#define CLONE_NEWNET 0x40000000 /* New network namespace (lo, device, names sockets, etc) */
#endif
#ifndef MNT_DETACH
#define MNT_DETACH 0x00000002 /* Just detach from the tree */
#endif /* MNT_DETACH */
/* sys/mount.h may be out too old to have these */
#ifndef MS_REC
#define MS_REC 16384
#endif
#ifndef MS_SLAVE
#define MS_SLAVE (1 << 19)
#endif
#ifndef MS_SHARED
#define MS_SHARED (1 << 20)
#endif
#if 0
#ifndef HAVE_SETNS
static inline int setns(int fd, int nstype)
{
#ifdef __NR_setns
return syscall(__NR_setns, fd, nstype);
#else
errno = ENOSYS;
return -1;
#endif
}
#endif /* HAVE_SETNS */
#endif
struct iplink_req {
struct nlmsghdr n;
struct ifinfomsg i;
char buf[1024];
};
typedef int (*nl_ext_ack_fn_t)(const char *errmsg, __u32 off,
const struct nlmsghdr *inner_nlh);
int netns_add(char *nsname);
int netns_delete(char *nsname);
int iplink_set_if_to_ns(int if_index, char * nsname);
int netns_switch(char *netns);
int rtnl_open_netns(char *nsname);
#endif /*LINUX_NETNS_H_*/
//main.c
#define _GNU_SOURCE
#include <fcntl.h>
#include <sched.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include "linux_netns.h"
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE);\
} while (0)
char *exec = "/bin/bash";
int
main(int argc, char *argv[]) {
int fd;
int opt;
int pid;
char path[100] = {0};
if (argc < 3) {
fprintf(stderr, "%s <ns_name> /bin/bash \n", argv[0]);
exit(EXIT_FAILURE);
}
char *optstring = "a:d:e:s:";
while ((opt = getopt(argc, argv, optstring)) != -1) {
switch(opt) {
case 'a':
printf("option: -%c, optarg: %s\r\n", opt, optarg);
netns_add(optarg);
break;
case 'd':
printf("option: -%c, optarg: %s\r\n", opt, optarg);
netns_delete(optarg);
break;
case 'e':
printf("option: -%c, optarg: %s\r\n", opt, optarg);
fd = rtnl_open_netns(optarg); // Get descriptor for namespace
if (fd == -1)
errExit("open");
if (setns(fd, 0) == -1) // Join that namespace
errExit("setns");
execvp(exec, &exec); // Execute a command in namspace
errExit("execvp");
break;
case 's':
pid = getpid();
printf("option: -%c, optarg: %s\r\n", opt, optarg);
printf("ll /proc/%d/ns/net\n", pid);
//sprintf(path, "ip netns identify %d", pid);
netns_switch(optarg);
sleep (100);
break;
default:
return 0;
}
}
return 0;
}
#Makefile
all:main.o linux_netns.o
gcc -o test main.o linux_netns.o
main.o: main.c
gcc -c main.c
linux_netns.o: linux_netns.c
gcc -c linux_netns.c
clean:
rm -rf main.o linux_netns.o test
eg: ./test -d ns_name
此测试程序实现的功能有:
-a nsname 添加namespace
-d nsname 删除namespace
-s nsname 将程序切换到namespace ,sleep 100,方便执行命令ip netns identify PID,查看进程所属namespace
-e nsname 进入到执行namespace /bin/bash,和ip netns exec ns /bin/bash 功能一致
进程切换ns使用总结:
? ? 当一个进程创建多个ns,且在每个ns中添加虚拟接口,比如tap,tun口时,建议先切换到默认ns中创建虚接口,这样的好处是创建的虚拟接口索引依次递增不会重复(不同ns中创建虚拟接口索引会重复)。然后再将虚拟接口移动到某个ns中。对不同ns中的接口操作,需要先将进程切换到指定ns,不然会找不到接口。
iplink_set_if_to_ns(int if_index, char * nsname) ,函数功能是将指定接口移动到指定ns。if_index就是接口的索引。
备注:以上代码移植的iproute-2-4.15.0
|