Add a simple struct nsset. It holds all necessary pieces to switch to a new
set of namespaces without leaving a task in a half-switched state which we
will make use of in the next patch. This patch switches the existing setns
logic over without causing a change in setns() behavior. This brings
setns() closer to how unshare() works(). The prepare_ns() function is
responsible to prepare all necessary information. This has two reasons.
First it minimizes dependencies between individual namespaces, i.e. all
install handler can expect that all fields are properly initialized
independent in what order they are called in. Second, this makes the code
easier to maintain and easier to follow if it needs to be changed.
The prepare_ns() helper will only be switched over to use a flags argument
in the next patch. Here it will still use nstype as a simple integer
argument which was argued would be clearer. I'm not particularly
opinionated about this if it really helps or not. The struct nsset itself
already contains the flags field since its name already indicates that it
can contain information required by different namespaces. None of this
should have functional consequences.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Jann Horn <jannh@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/r/20200505140432.181565-2-christian.brauner@ubuntu.com
put_mnt_ns(to_mnt_ns(ns));
}
-static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int mntns_install(struct nsset *nsset, struct ns_common *ns)
{
- struct fs_struct *fs = current->fs;
+ struct nsproxy *nsproxy = nsset->nsproxy;
+ struct fs_struct *fs = nsset->fs;
struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
+ struct user_namespace *user_ns = nsset->cred->user_ns;
struct path root;
int err;
if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
- !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
- !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+ !ns_capable(user_ns, CAP_SYS_CHROOT) ||
+ !ns_capable(user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (is_anon_ns(mnt_ns))
struct mnt_namespace;
struct fs_struct;
struct user_namespace;
+struct ns_common;
extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
struct user_namespace *, struct fs_struct *);
};
extern struct nsproxy init_nsproxy;
+/*
+ * A structure to encompass all bits needed to install
+ * a partial or complete new set of namespaces.
+ *
+ * If a new user namespace is requested cred will
+ * point to a modifiable set of credentials. If a pointer
+ * to a modifiable set is needed nsset_cred() must be
+ * used and tested.
+ */
+struct nsset {
+ unsigned flags;
+ struct nsproxy *nsproxy;
+ struct fs_struct *fs;
+ const struct cred *cred;
+};
+
+static inline struct cred *nsset_cred(struct nsset *set)
+{
+ if (set->flags & CLONE_NEWUSER)
+ return (struct cred *)set->cred;
+
+ return NULL;
+}
+
/*
* the namespaces access rules are:
*
#include <linux/ns_common.h>
struct pid_namespace;
-struct nsproxy;
+struct nsset;
struct path;
struct task_struct;
struct inode;
int type;
struct ns_common *(*get)(struct task_struct *task);
void (*put)(struct ns_common *ns);
- int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
+ int (*install)(struct nsset *nsset, struct ns_common *ns);
struct user_namespace *(*owner)(struct ns_common *ns);
struct ns_common *(*get_parent)(struct ns_common *ns);
} __randomize_layout;
return put_ipc_ns(to_ipc_ns(ns));
}
-static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
+static int ipcns_install(struct nsset *nsset, struct ns_common *new)
{
+ struct nsproxy *nsproxy = nsset->nsproxy;
struct ipc_namespace *ns = to_ipc_ns(new);
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
- !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+ !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
return -EPERM;
- /* Ditch state from the old ipc namespace */
- exit_sem(current);
put_ipc_ns(nsproxy->ipc_ns);
nsproxy->ipc_ns = get_ipc_ns(ns);
return 0;
return container_of(ns, struct cgroup_namespace, ns);
}
-static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int cgroupns_install(struct nsset *nsset, struct ns_common *ns)
{
+ struct nsproxy *nsproxy = nsset->nsproxy;
struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
- if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
+ if (!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
#include <linux/time_namespace.h>
+#include <linux/fs_struct.h>
#include <linux/proc_ns.h>
#include <linux/file.h>
#include <linux/syscalls.h>
switch_task_namespaces(p, NULL);
}
+static void put_nsset(struct nsset *nsset)
+{
+ unsigned flags = nsset->flags;
+
+ if (flags & CLONE_NEWUSER)
+ put_cred(nsset_cred(nsset));
+ if (nsset->nsproxy)
+ free_nsproxy(nsset->nsproxy);
+}
+
+static int prepare_nsset(int nstype, struct nsset *nsset)
+{
+ struct task_struct *me = current;
+
+ nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs);
+ if (IS_ERR(nsset->nsproxy))
+ return PTR_ERR(nsset->nsproxy);
+
+ if (nstype == CLONE_NEWUSER)
+ nsset->cred = prepare_creds();
+ else
+ nsset->cred = current_cred();
+ if (!nsset->cred)
+ goto out;
+
+ if (nstype == CLONE_NEWNS)
+ nsset->fs = me->fs;
+
+ nsset->flags = nstype;
+ return 0;
+
+out:
+ put_nsset(nsset);
+ return -ENOMEM;
+}
+
+/*
+ * This is the point of no return. There are just a few namespaces
+ * that do some actual work here and it's sufficiently minimal that
+ * a separate ns_common operation seems unnecessary for now.
+ * Unshare is doing the same thing. If we'll end up needing to do
+ * more in a given namespace or a helper here is ultimately not
+ * exported anymore a simple commit handler for each namespace
+ * should be added to ns_common.
+ */
+static void commit_nsset(struct nsset *nsset)
+{
+ unsigned flags = nsset->flags;
+ struct task_struct *me = current;
+
+#ifdef CONFIG_USER_NS
+ if (flags & CLONE_NEWUSER) {
+ /* transfer ownership */
+ commit_creds(nsset_cred(nsset));
+ nsset->cred = NULL;
+ }
+#endif
+
+#ifdef CONFIG_IPC_NS
+ if (flags & CLONE_NEWIPC)
+ exit_sem(me);
+#endif
+
+ /* transfer ownership */
+ switch_task_namespaces(me, nsset->nsproxy);
+ nsset->nsproxy = NULL;
+}
+
SYSCALL_DEFINE2(setns, int, fd, int, nstype)
{
- struct task_struct *tsk = current;
- struct nsproxy *new_nsproxy;
struct file *file;
struct ns_common *ns;
+ struct nsset nsset = {};
int err;
file = proc_ns_fget(fd);
if (nstype && (ns->ops->type != nstype))
goto out;
- new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
- if (IS_ERR(new_nsproxy)) {
- err = PTR_ERR(new_nsproxy);
+ err = prepare_nsset(ns->ops->type, &nsset);
+ if (err)
goto out;
- }
- err = ns->ops->install(new_nsproxy, ns);
- if (err) {
- free_nsproxy(new_nsproxy);
- goto out;
+ err = ns->ops->install(&nsset, ns);
+ if (!err) {
+ commit_nsset(&nsset);
+ perf_event_namespaces(current);
}
- switch_task_namespaces(tsk, new_nsproxy);
-
- perf_event_namespaces(tsk);
+ put_nsset(&nsset);
out:
fput(file);
return err;
put_pid_ns(to_pid_ns(ns));
}
-static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int pidns_install(struct nsset *nsset, struct ns_common *ns)
{
+ struct nsproxy *nsproxy = nsset->nsproxy;
struct pid_namespace *active = task_active_pid_ns(current);
struct pid_namespace *ancestor, *new = to_pid_ns(ns);
if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
- !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+ !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
return -EPERM;
/*
put_time_ns(to_time_ns(ns));
}
-static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
+static int timens_install(struct nsset *nsset, struct ns_common *new)
{
+ struct nsproxy *nsproxy = nsset->nsproxy;
struct time_namespace *ns = to_time_ns(new);
int err;
return -EUSERS;
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
- !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+ !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
return -EPERM;
timens_set_vvar_page(current, ns);
put_user_ns(to_user_ns(ns));
}
-static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int userns_install(struct nsset *nsset, struct ns_common *ns)
{
struct user_namespace *user_ns = to_user_ns(ns);
struct cred *cred;
if (!ns_capable(user_ns, CAP_SYS_ADMIN))
return -EPERM;
- cred = prepare_creds();
+ cred = nsset_cred(nsset);
if (!cred)
- return -ENOMEM;
+ return -EINVAL;
put_user_ns(cred->user_ns);
set_cred_user_ns(cred, get_user_ns(user_ns));
- return commit_creds(cred);
+ return 0;
}
struct ns_common *ns_get_owner(struct ns_common *ns)
put_uts_ns(to_uts_ns(ns));
}
-static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new)
+static int utsns_install(struct nsset *nsset, struct ns_common *new)
{
+ struct nsproxy *nsproxy = nsset->nsproxy;
struct uts_namespace *ns = to_uts_ns(new);
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
- !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+ !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
return -EPERM;
get_uts_ns(ns);
put_net(to_net_ns(ns));
}
-static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int netns_install(struct nsset *nsset, struct ns_common *ns)
{
+ struct nsproxy *nsproxy = nsset->nsproxy;
struct net *net = to_net_ns(ns);
if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
- !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+ !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
return -EPERM;
put_net(nsproxy->net_ns);