summaryrefslogtreecommitdiffstats
path: root/meta/recipes-core/systemd/systemd/0027-proc-dont-trigger-mount-error-with-invalid-options-o.patch
blob: b1d3d6963c454525db0f2b26a6c8dfbe40450e3c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
From 297aba739cd689e4dc9f43bb1422ec88d481099a Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 13 Jan 2021 21:09:33 +0000
Subject: [PATCH] proc: dont trigger mount error with invalid options on old
 kernels

As of commit 4e39995371738b04d98d27b0d34ea8fe09ec9fab ("core: introduce
ProtectProc= and ProcSubset= to expose hidepid= and subset= procfs
mount options") kernels older than v5.8 generate multple warnings at
boot, as seen in this Yocto build from today:

     qemux86-64 login: root
     [   65.829009] proc: Bad value for 'hidepid'
     root@qemux86-64:~# dmesg|grep proc:
     [   16.990706] proc: Bad value for 'hidepid'
     [   28.060178] proc: Bad value for 'hidepid'
     [   28.874229] proc: Bad value for 'hidepid'
     [   32.685107] proc: Bad value for 'hidepid'
     [   65.829009] proc: Bad value for 'hidepid'
     root@qemux86-64:~#

The systemd maintainer has dismissed this as something people should
simply ignore[1] and has no interest in trying to avoid it by
proactively checking the kernel version, so people can safely assume
that they will never see this version check commit upstream.

However, as can be seen above, telling people to just ignore it is not
an option, as we'll end up answering the same question and dealing with
the same bug over and over again.

The commit that triggers this is systemd v247-rc1~378^2~3 -- so any
systemd 247 and above plus kernel v5.7 or older will need this.

[1] https://github.com/systemd/systemd/issues/16896

Upstream-Status: Denied [https://github.com/systemd/systemd/issues/16896]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>

diff --git a/src/core/namespace.c b/src/core/namespace.c
index cdf427a6ea93..f8fc33a89fc2 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -4,7 +4,9 @@
 #include <linux/loop.h>
 #include <sched.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <sys/mount.h>
+#include <sys/utsname.h>
 #include <unistd.h>
 #include <linux/fs.h>
 
@@ -859,14 +861,34 @@ static int mount_sysfs(const MountEntry *m) {
 }
 
 static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
+        _cleanup_free_ char *opts = NULL;
         const char *entry_path;
-        int r;
+        int r, major, minor;
+        struct utsname uts;
+        bool old = false;
 
         assert(m);
         assert(ns_info);
 
         entry_path = mount_entry_path(m);
 
+        /* If uname says that the system is older than v5.8, then the textual hidepid= stuff is not
+         * supported by the kernel, and thus the per-instance hidepid= neither, which means we
+         * really don't want to use it, since it would affect our host's /proc * mount. Hence let's
+         * gracefully fallback to a classic, unrestricted version. */
+
+        r = uname(&uts);
+        if (r < 0)
+               return errno;
+
+        major = atoi(uts.release);
+        minor = atoi(strchr(uts.release, '.') + 1);
+
+        if (major < 5 || (major == 5 && minor < 8)) {
+                log_debug("Pre v5.8 kernel detected [v%d.%d] - skipping hidepid=", major, minor);
+                old = true;
+        }
+
         /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in
          * one. i.e we don't reuse existing mounts here under any condition, we want a new instance owned by
          * our user namespace and with our hidepid= settings applied. Hence, let's get rid of everything
@@ -875,9 +897,8 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
         (void) mkdir_p_label(entry_path, 0755);
         (void) umount_recursive(entry_path, 0);
 
-        if (ns_info->protect_proc != PROTECT_PROC_DEFAULT ||
-            ns_info->proc_subset != PROC_SUBSET_ALL) {
-                _cleanup_free_ char *opts = NULL;
+        if (!old && (ns_info->protect_proc != PROTECT_PROC_DEFAULT ||
+            ns_info->proc_subset != PROC_SUBSET_ALL)) {
 
                 /* Starting with kernel 5.8 procfs' hidepid= logic is truly per-instance (previously it
                  * pretended to be per-instance but actually was per-namespace), hence let's make use of it
@@ -891,21 +912,9 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
                                ns_info->proc_subset == PROC_SUBSET_PID ? ",subset=pid" : "");
                 if (!opts)
                         return -ENOMEM;
-
-                r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
-                if (r < 0) {
-                        if (r != -EINVAL)
-                                return r;
-
-                        /* If this failed with EINVAL then this likely means the textual hidepid= stuff is
-                         * not supported by the kernel, and thus the per-instance hidepid= neither, which
-                         * means we really don't want to use it, since it would affect our host's /proc
-                         * mount. Hence let's gracefully fallback to a classic, unrestricted version. */
-                } else
-                        return 1;
         }
 
-        r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+        r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
         if (r < 0)
                 return r;
 
-- 
2.29.2