diff options
Diffstat (limited to 'recipes/linux/linux-omap-2.6.37/base/0010-Miracle-patch.patch')
-rw-r--r-- | recipes/linux/linux-omap-2.6.37/base/0010-Miracle-patch.patch | 504 |
1 files changed, 504 insertions, 0 deletions
diff --git a/recipes/linux/linux-omap-2.6.37/base/0010-Miracle-patch.patch b/recipes/linux/linux-omap-2.6.37/base/0010-Miracle-patch.patch new file mode 100644 index 0000000000..c5eba83d35 --- /dev/null +++ b/recipes/linux/linux-omap-2.6.37/base/0010-Miracle-patch.patch @@ -0,0 +1,504 @@ +From ce4f1f734efd638af01f1849ffffdc2746ad4a55 Mon Sep 17 00:00:00 2001 +From: Mike Galbraith <efault@gmx.de> +Date: Fri, 19 Nov 2010 12:52:42 +0100 +Subject: [PATCH 10/28] Miracle patch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +On Sun, 2010-11-14 at 16:26 -0800, Linus Torvalds wrote: +> On Sun, Nov 14, 2010 at 4:15 PM, Linus Torvalds +> <torvalds@linux-foundation.org> wrote: +> > +> > THAT is why I think it's so silly to try to be so strict and walk over +> > all processes while holding a couple of spinlocks. +> +> Btw, let me say that I think the patch is great even with that thing +> in. It looks clean, the thing I'm complaining about is not a big deal, +> and it seems to perform very much as advertized. The difference with +> autogroup scheduling is very noticeable with a simple "make -j64" +> kernel compile. +> +> So I really don't think it's a big deal. The sysctl handler isn't even +> complicated. But boy does it hurt my eyes to see a spinlock held +> around a "do_each_thread()". And I do get the feeling that the +> simplest way to fix it would be to just remove the code entirely, and +> just say that "enabling/disabling may be delayed for old processes +> with existing autogroups". + +Which is what I just did. If the oddball case isn't a big deal, the +patch shrinks, which is a good thing. I just wanted to cover all bases. + +Patchlet with handler whacked: + +A recurring complaint from CFS users is that parallel kbuild has a negative +impact on desktop interactivity. This patch implements an idea from Linus, +to automatically create task groups. This patch only implements Linus' per +tty task group suggestion, and only for fair class tasks, but leaves the way +open for enhancement. + +Implementation: each task's signal struct contains an inherited pointer to a +refcounted autogroup struct containing a task group pointer, the default for +all tasks pointing to the init_task_group. When a task calls __proc_set_tty(), +the process wide reference to the default group is dropped, a new task group is +created, and the process is moved into the new task group. Children thereafter +inherit this task group, and increase it's refcount. On exit, a reference to the +current task group is dropped when the last reference to each signal struct is +dropped. The task group is destroyed when the last signal struct referencing +it is freed. At runqueue selection time, IFF a task has no cgroup assignment, +it's current autogroup is used. + +The feature is enabled from boot by default if CONFIG_SCHED_AUTOGROUP is +selected, but can be disabled via the boot option noautogroup, and can be +also be turned on/off on the fly via.. + echo [01] > /proc/sys/kernel/sched_autogroup_enabled. +..which will automatically move tasks to/from the root task group. + +Some numbers. + +A 100% hog overhead measurement proggy pinned to the same CPU as a make -j10 + +About measurement proggy: + pert/sec = perturbations/sec + min/max/avg = scheduler service latencies in usecs + sum/s = time accrued by the competition per sample period (1 sec here) + overhead = %CPU received by the competition per sample period + +pert/s: 31 >40475.37us: 3 min: 0.37 max:48103.60 avg:29573.74 sum/s:916786us overhead:90.24% +pert/s: 23 >41237.70us: 12 min: 0.36 max:56010.39 avg:40187.01 sum/s:924301us overhead:91.99% +pert/s: 24 >42150.22us: 12 min: 8.86 max:61265.91 avg:39459.91 sum/s:947038us overhead:92.20% +pert/s: 26 >42344.91us: 11 min: 3.83 max:52029.60 avg:36164.70 sum/s:940282us overhead:91.12% +pert/s: 24 >44262.90us: 14 min: 5.05 max:82735.15 avg:40314.33 sum/s:967544us overhead:92.22% + +Same load with this patch applied. + +pert/s: 229 >5484.43us: 41 min: 0.15 max:12069.42 avg:2193.81 sum/s:502382us overhead:50.24% +pert/s: 222 >5652.28us: 43 min: 0.46 max:12077.31 avg:2248.56 sum/s:499181us overhead:49.92% +pert/s: 211 >5809.38us: 43 min: 0.16 max:12064.78 avg:2381.70 sum/s:502538us overhead:50.25% +pert/s: 223 >6147.92us: 43 min: 0.15 max:16107.46 avg:2282.17 sum/s:508925us overhead:50.49% +pert/s: 218 >6252.64us: 43 min: 0.16 max:12066.13 avg:2324.11 sum/s:506656us overhead:50.27% + +Average service latency is an order of magnitude better with autogroup. +(Imagine that pert were Xorg or whatnot instead) + +Using Mathieu Desnoyers' wakeup-latency testcase: + +With taskset -c 3 make -j 10 running.. + +taskset -c 3 ./wakeup-latency& sleep 30;killall wakeup-latency + +without: +maximum latency: 42963.2 µs +average latency: 9077.0 µs +missed timer events: 0 + +with: +maximum latency: 4160.7 µs +average latency: 149.4 µs +missed timer events: 0 + +Signed-off-by: Mike Galbraith <efault@gmx.de> +--- + Documentation/kernel-parameters.txt | 2 + + drivers/tty/tty_io.c | 1 + + include/linux/sched.h | 19 +++++ + init/Kconfig | 12 +++ + kernel/fork.c | 5 +- + kernel/sched.c | 25 ++++-- + kernel/sched_autogroup.c | 140 +++++++++++++++++++++++++++++++++++ + kernel/sched_autogroup.h | 18 +++++ + kernel/sysctl.c | 11 +++ + 9 files changed, 224 insertions(+), 9 deletions(-) + create mode 100644 kernel/sched_autogroup.c + create mode 100644 kernel/sched_autogroup.h + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index 01ece1b..1031923 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -1622,6 +1622,8 @@ and is between 256 and 4096 characters. It is defined in the file + noapic [SMP,APIC] Tells the kernel to not make use of any + IOAPICs that may be present in the system. + ++ noautogroup Disable scheduler automatic task group creation. ++ + nobats [PPC] Do not use BATs for mapping kernel lowmem + on "Classic" PPC cores. + +diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c +index 35480dd..1849f4a 100644 +--- a/drivers/tty/tty_io.c ++++ b/drivers/tty/tty_io.c +@@ -3169,6 +3169,7 @@ static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty) + put_pid(tsk->signal->tty_old_pgrp); + tsk->signal->tty = tty_kref_get(tty); + tsk->signal->tty_old_pgrp = NULL; ++ sched_autogroup_create_attach(tsk); + } + + static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty) +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 2238745..3a775e3 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -509,6 +509,8 @@ struct thread_group_cputimer { + spinlock_t lock; + }; + ++struct autogroup; ++ + /* + * NOTE! "signal_struct" does not have it's own + * locking, because a shared signal_struct always +@@ -576,6 +578,9 @@ struct signal_struct { + + struct tty_struct *tty; /* NULL if no tty */ + ++#ifdef CONFIG_SCHED_AUTOGROUP ++ struct autogroup *autogroup; ++#endif + /* + * Cumulative resource counters for dead threads in the group, + * and for reaped dead child processes forked by this group. +@@ -1931,6 +1936,20 @@ int sched_rt_handler(struct ctl_table *table, int write, + + extern unsigned int sysctl_sched_compat_yield; + ++#ifdef CONFIG_SCHED_AUTOGROUP ++extern unsigned int sysctl_sched_autogroup_enabled; ++ ++extern void sched_autogroup_create_attach(struct task_struct *p); ++extern void sched_autogroup_detach(struct task_struct *p); ++extern void sched_autogroup_fork(struct signal_struct *sig); ++extern void sched_autogroup_exit(struct signal_struct *sig); ++#else ++static inline void sched_autogroup_create_attach(struct task_struct *p) { } ++static inline void sched_autogroup_detach(struct task_struct *p) { } ++static inline void sched_autogroup_fork(struct signal_struct *sig) { } ++static inline void sched_autogroup_exit(struct signal_struct *sig) { } ++#endif ++ + #ifdef CONFIG_RT_MUTEXES + extern int rt_mutex_getprio(struct task_struct *p); + extern void rt_mutex_setprio(struct task_struct *p, int prio); +diff --git a/init/Kconfig b/init/Kconfig +index c972899..a4985d9 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -741,6 +741,18 @@ config NET_NS + + endif # NAMESPACES + ++config SCHED_AUTOGROUP ++ bool "Automatic process group scheduling" ++ select CGROUPS ++ select CGROUP_SCHED ++ select FAIR_GROUP_SCHED ++ help ++ This option optimizes the scheduler for common desktop workloads by ++ automatically creating and populating task groups. This separation ++ of workloads isolates aggressive CPU burners (like build jobs) from ++ desktop applications. Task group autogeneration is currently based ++ upon task tty association. ++ + config MM_OWNER + bool + +diff --git a/kernel/fork.c b/kernel/fork.c +index 5447dc7..70ea75f 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig) + + static inline void put_signal_struct(struct signal_struct *sig) + { +- if (atomic_dec_and_test(&sig->sigcnt)) ++ if (atomic_dec_and_test(&sig->sigcnt)) { ++ sched_autogroup_exit(sig); + free_signal_struct(sig); ++ } + } + + void __put_task_struct(struct task_struct *tsk) +@@ -905,6 +907,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) + posix_cpu_timers_init_group(sig); + + tty_audit_fork(sig); ++ sched_autogroup_fork(sig); + + sig->oom_adj = current->signal->oom_adj; + sig->oom_score_adj = current->signal->oom_score_adj; +diff --git a/kernel/sched.c b/kernel/sched.c +index 297d1a0..53ff9a1 100644 +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -78,6 +78,7 @@ + + #include "sched_cpupri.h" + #include "workqueue_sched.h" ++#include "sched_autogroup.h" + + #define CREATE_TRACE_POINTS + #include <trace/events/sched.h> +@@ -605,11 +606,14 @@ static inline int cpu_of(struct rq *rq) + */ + static inline struct task_group *task_group(struct task_struct *p) + { ++ struct task_group *tg; + struct cgroup_subsys_state *css; + + css = task_subsys_state_check(p, cpu_cgroup_subsys_id, + lockdep_is_held(&task_rq(p)->lock)); +- return container_of(css, struct task_group, css); ++ tg = container_of(css, struct task_group, css); ++ ++ return autogroup_task_group(p, tg); + } + + /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ +@@ -2063,6 +2067,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) + #include "sched_idletask.c" + #include "sched_fair.c" + #include "sched_rt.c" ++#include "sched_autogroup.c" + #include "sched_stoptask.c" + #ifdef CONFIG_SCHED_DEBUG + # include "sched_debug.c" +@@ -8164,7 +8169,7 @@ void __init sched_init(void) + #ifdef CONFIG_CGROUP_SCHED + list_add(&init_task_group.list, &task_groups); + INIT_LIST_HEAD(&init_task_group.children); +- ++ autogroup_init(&init_task); + #endif /* CONFIG_CGROUP_SCHED */ + + #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP +@@ -8694,15 +8699,11 @@ void sched_destroy_group(struct task_group *tg) + /* change task's runqueue when it moves between groups. + * The caller of this function should have put the task in its new group + * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to +- * reflect its new group. ++ * reflect its new group. Called with the runqueue lock held. + */ +-void sched_move_task(struct task_struct *tsk) ++void __sched_move_task(struct task_struct *tsk, struct rq *rq) + { + int on_rq, running; +- unsigned long flags; +- struct rq *rq; +- +- rq = task_rq_lock(tsk, &flags); + + running = task_current(rq, tsk); + on_rq = tsk->se.on_rq; +@@ -8723,7 +8724,15 @@ void sched_move_task(struct task_struct *tsk) + tsk->sched_class->set_curr_task(rq); + if (on_rq) + enqueue_task(rq, tsk, 0); ++} + ++void sched_move_task(struct task_struct *tsk) ++{ ++ struct rq *rq; ++ unsigned long flags; ++ ++ rq = task_rq_lock(tsk, &flags); ++ __sched_move_task(tsk, rq); + task_rq_unlock(rq, &flags); + } + #endif /* CONFIG_CGROUP_SCHED */ +diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c +new file mode 100644 +index 0000000..62f1d0e +--- /dev/null ++++ b/kernel/sched_autogroup.c +@@ -0,0 +1,140 @@ ++#ifdef CONFIG_SCHED_AUTOGROUP ++ ++unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; ++ ++struct autogroup { ++ struct kref kref; ++ struct task_group *tg; ++}; ++ ++static struct autogroup autogroup_default; ++ ++static void autogroup_init(struct task_struct *init_task) ++{ ++ autogroup_default.tg = &init_task_group; ++ kref_init(&autogroup_default.kref); ++ init_task->signal->autogroup = &autogroup_default; ++} ++ ++static inline void autogroup_destroy(struct kref *kref) ++{ ++ struct autogroup *ag = container_of(kref, struct autogroup, kref); ++ struct task_group *tg = ag->tg; ++ ++ kfree(ag); ++ sched_destroy_group(tg); ++} ++ ++static inline void autogroup_kref_put(struct autogroup *ag) ++{ ++ kref_put(&ag->kref, autogroup_destroy); ++} ++ ++static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) ++{ ++ kref_get(&ag->kref); ++ return ag; ++} ++ ++static inline struct autogroup *autogroup_create(void) ++{ ++ struct autogroup *ag = kmalloc(sizeof(*ag), GFP_KERNEL); ++ ++ if (!ag) ++ goto out_fail; ++ ++ ag->tg = sched_create_group(&init_task_group); ++ kref_init(&ag->kref); ++ ++ if (!(IS_ERR(ag->tg))) ++ return ag; ++ ++out_fail: ++ if (ag) { ++ kfree(ag); ++ WARN_ON(1); ++ } else ++ WARN_ON(1); ++ ++ return autogroup_kref_get(&autogroup_default); ++} ++ ++static inline struct task_group * ++autogroup_task_group(struct task_struct *p, struct task_group *tg) ++{ ++ int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); ++ ++ enabled &= (tg == &root_task_group); ++ enabled &= (p->sched_class == &fair_sched_class); ++ enabled &= (!(p->flags & PF_EXITING)); ++ ++ if (enabled) ++ return p->signal->autogroup->tg; ++ ++ return tg; ++} ++ ++static void ++autogroup_move_group(struct task_struct *p, struct autogroup *ag) ++{ ++ struct autogroup *prev; ++ struct task_struct *t; ++ struct rq *rq; ++ unsigned long flags; ++ ++ rq = task_rq_lock(p, &flags); ++ prev = p->signal->autogroup; ++ if (prev == ag) { ++ task_rq_unlock(rq, &flags); ++ return; ++ } ++ ++ p->signal->autogroup = autogroup_kref_get(ag); ++ __sched_move_task(p, rq); ++ task_rq_unlock(rq, &flags); ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(t, &p->thread_group, thread_group) { ++ sched_move_task(t); ++ } ++ rcu_read_unlock(); ++ ++ autogroup_kref_put(prev); ++} ++ ++void sched_autogroup_create_attach(struct task_struct *p) ++{ ++ struct autogroup *ag = autogroup_create(); ++ ++ autogroup_move_group(p, ag); ++ /* drop extra refrence added by autogroup_create() */ ++ autogroup_kref_put(ag); ++} ++EXPORT_SYMBOL(sched_autogroup_create_attach); ++ ++/* currently has no users */ ++void sched_autogroup_detach(struct task_struct *p) ++{ ++ autogroup_move_group(p, &autogroup_default); ++} ++EXPORT_SYMBOL(sched_autogroup_detach); ++ ++void sched_autogroup_fork(struct signal_struct *sig) ++{ ++ sig->autogroup = autogroup_kref_get(current->signal->autogroup); ++} ++ ++void sched_autogroup_exit(struct signal_struct *sig) ++{ ++ autogroup_kref_put(sig->autogroup); ++} ++ ++static int __init setup_autogroup(char *str) ++{ ++ sysctl_sched_autogroup_enabled = 0; ++ ++ return 1; ++} ++ ++__setup("noautogroup", setup_autogroup); ++#endif +diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h +new file mode 100644 +index 0000000..6048f5d +--- /dev/null ++++ b/kernel/sched_autogroup.h +@@ -0,0 +1,18 @@ ++#ifdef CONFIG_SCHED_AUTOGROUP ++ ++static void __sched_move_task(struct task_struct *tsk, struct rq *rq); ++ ++static inline struct task_group * ++autogroup_task_group(struct task_struct *p, struct task_group *tg); ++ ++#else /* !CONFIG_SCHED_AUTOGROUP */ ++ ++static inline void autogroup_init(struct task_struct *init_task) { } ++ ++static inline struct task_group * ++autogroup_task_group(struct task_struct *p, struct task_group *tg) ++{ ++ return tg; ++} ++ ++#endif /* CONFIG_SCHED_AUTOGROUP */ +diff --git a/kernel/sysctl.c b/kernel/sysctl.c +index 5abfa15..b162f65 100644 +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -382,6 +382,17 @@ static struct ctl_table kern_table[] = { + .mode = 0644, + .proc_handler = proc_dointvec, + }, ++#ifdef CONFIG_SCHED_AUTOGROUP ++ { ++ .procname = "sched_autogroup_enabled", ++ .data = &sysctl_sched_autogroup_enabled, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ .extra1 = &zero, ++ .extra2 = &one, ++ }, ++#endif + #ifdef CONFIG_PROVE_LOCKING + { + .procname = "prove_locking", +-- +1.6.6.1 + |