summaryrefslogtreecommitdiffstats
path: root/meta/recipes-kernel/lttng/lttng-modules/0003-fix-sched-tracing-Don-t-re-read-p-state-when-emittin.patch
blob: afe514de82c140270216b9d2e9e27da52d132080 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
From 8e52fd71e693619f7a58de2692e59f0c826e9988 Mon Sep 17 00:00:00 2001
From: Michael Jeanson <mjeanson@efficios.com>
Date: Mon, 4 Apr 2022 13:52:57 -0400
Subject: [PATCH 03/10] fix: sched/tracing: Don't re-read p->state when
 emitting sched_switch event (v5.18)

See upstream commit :

  commit fa2c3254d7cfff5f7a916ab928a562d1165f17bb
  Author: Valentin Schneider <valentin.schneider@arm.com>
  Date:   Thu Jan 20 16:25:19 2022 +0000

    sched/tracing: Don't re-read p->state when emitting sched_switch event

    As of commit

      c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu")

    the following sequence becomes possible:

                          p->__state = TASK_INTERRUPTIBLE;
                          __schedule()
                            deactivate_task(p);
      ttwu()
        READ !p->on_rq
        p->__state=TASK_WAKING
                            trace_sched_switch()
                              __trace_sched_switch_state()
                                task_state_index()
                                  return 0;

    TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in
    the trace event.

    Prevent this by pushing the value read from __schedule() down the trace
    event.

Upstream-Status: Backport

Change-Id: I46743cd006be4b4d573cae2d77df7d6d16744d04
Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 include/instrumentation/events/sched.h | 88 +++++++++++++++++++++++---
 1 file changed, 78 insertions(+), 10 deletions(-)

diff --git a/include/instrumentation/events/sched.h b/include/instrumentation/events/sched.h
index 91953a6f..339bec94 100644
--- a/include/instrumentation/events/sched.h
+++ b/include/instrumentation/events/sched.h
@@ -20,7 +20,37 @@
 #ifndef _TRACE_SCHED_DEF_
 #define _TRACE_SCHED_DEF_
 
-#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0))
+#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0))
+
+static inline long __trace_sched_switch_state(bool preempt,
+		unsigned int prev_state,
+		struct task_struct *p)
+{
+        unsigned int state;
+
+#ifdef CONFIG_SCHED_DEBUG
+        BUG_ON(p != current);
+#endif /* CONFIG_SCHED_DEBUG */
+
+        /*
+         * Preemption ignores task state, therefore preempted tasks are always
+         * RUNNING (we will not have dequeued if state != RUNNING).
+         */
+        if (preempt)
+                return TASK_REPORT_MAX;
+
+        /*
+         * task_state_index() uses fls() and returns a value from 0-8 range.
+         * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
+         * it for left shift operation to get the correct task->state
+         * mapping.
+         */
+	state = __task_state_index(prev_state, p->exit_state);
+
+        return state ? (1 << (state - 1)) : state;
+}
+
+#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0))
 
 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
 {
@@ -321,43 +351,81 @@ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
 /*
  * Tracepoint for task switches, performed by the scheduler:
  */
+
+#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0))
 LTTNG_TRACEPOINT_EVENT(sched_switch,
 
-#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0))
 	TP_PROTO(bool preempt,
-		 struct task_struct *prev,
-		 struct task_struct *next),
+		unsigned int prev_state,
+		struct task_struct *prev,
+		struct task_struct *next),
 
-	TP_ARGS(preempt, prev, next),
+	TP_ARGS(preempt, prev_state, prev, next),
+
+	TP_FIELDS(
+		ctf_array_text(char, prev_comm,	prev->comm, TASK_COMM_LEN)
+		ctf_integer(pid_t, prev_tid, prev->pid)
+		ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
+#ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
+		ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev))
 #else
-	TP_PROTO(struct task_struct *prev,
+		ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev))
+#endif
+		ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
+		ctf_integer(pid_t, next_tid, next->pid)
+		ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
+	)
+)
+
+#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0))
+
+LTTNG_TRACEPOINT_EVENT(sched_switch,
+
+	TP_PROTO(bool preempt,
+		 struct task_struct *prev,
 		 struct task_struct *next),
 
-	TP_ARGS(prev, next),
-#endif /* #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) */
+	TP_ARGS(preempt, prev, next),
 
 	TP_FIELDS(
 		ctf_array_text(char, prev_comm,	prev->comm, TASK_COMM_LEN)
 		ctf_integer(pid_t, prev_tid, prev->pid)
 		ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
-#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0))
 #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
 		ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev))
 #else
 		ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev))
 #endif
+		ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
+		ctf_integer(pid_t, next_tid, next->pid)
+		ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
+	)
+)
+
 #else
+
+LTTNG_TRACEPOINT_EVENT(sched_switch,
+
+	TP_PROTO(struct task_struct *prev,
+		 struct task_struct *next),
+
+	TP_ARGS(prev, next),
+
+	TP_FIELDS(
+		ctf_array_text(char, prev_comm,	prev->comm, TASK_COMM_LEN)
+		ctf_integer(pid_t, prev_tid, prev->pid)
+		ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
 #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
 		ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(prev))
 #else
 		ctf_integer(long, prev_state, __trace_sched_switch_state(prev))
-#endif
 #endif
 		ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
 		ctf_integer(pid_t, next_tid, next->pid)
 		ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
 	)
 )
+#endif
 
 /*
  * Tracepoint for a task being migrated:
-- 
2.19.1