Commit a39b8633 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-core-for-linus' of...

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (31 commits)
  sched: fix warning in fs/proc/base.c
  schedstat: consolidate per-task cpu runtime stats
  sched: use RCU variant of list traversal in for_each_leaf_rt_rq()
  sched, cpuacct: export percpu cpuacct cgroup stats
  sched, cpuacct: refactoring cpuusage_read / cpuusage_write
  sched: optimize update_curr()
  sched: fix wakeup preemption clock
  sched: add missing arch_update_cpu_topology() call
  sched: let arch_update_cpu_topology indicate if topology changed
  sched: idle_balance() does not call load_balance_newidle()
  sched: fix sd_parent_degenerate on non-numa smp machine
  sched: add uid information to sched_debug for CONFIG_USER_SCHED
  sched: move double_unlock_balance() higher
  sched: update comment for move_task_off_dead_cpu
  sched: fix inconsistency when redistribute per-cpu tg->cfs_rq shares
  sched/rt: removed unneeded defintion
  sched: add hierarchical accounting to cpu accounting controller
  sched: include group statistics in /proc/sched_debug
  sched: rename SCHED_NO_NO_OMIT_FRAME_POINTER => SCHED_OMIT_FRAME_POINTER
  sched: clean up SCHED_CPUMASK_ALLOC
  ...
parents b0f4b285 4e202284
CPU Accounting Controller
-------------------------
The CPU accounting controller is used to group tasks using cgroups and
account the CPU usage of these groups of tasks.
The CPU accounting controller supports multi-hierarchy groups. An accounting
group accumulates the CPU usage of all of its child groups and the tasks
directly present in its group.
Accounting groups can be created by first mounting the cgroup filesystem.
# mkdir /cgroups
# mount -t cgroup -ocpuacct none /cgroups
With the above step, the initial or the parent accounting group
becomes visible at /cgroups. At bootup, this group includes all the
tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
this group which is essentially the CPU time obtained by all the tasks
in the system.
New accounting groups can be created under the parent group /cgroups.
# cd /cgroups
# mkdir g1
# echo $$ > g1
The above steps create a new group g1 and move the current shell
process (bash) into it. CPU time consumed by this bash and its children
can be obtained from g1/cpuacct.usage and the same is accumulated in
/cgroups/cpuacct.usage also.
......@@ -8,7 +8,7 @@ Context switch
By default, the switch_to arch function is called with the runqueue
locked. This is usually not a problem unless switch_to may need to
take the runqueue lock. This is usually due to a wake up operation in
the context switch. See include/asm-ia64/system.h for an example.
the context switch. See arch/ia64/include/asm/system.h for an example.
To request the scheduler call switch_to with the runqueue unlocked,
you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
......@@ -23,7 +23,7 @@ disabled. Interrupts may be enabled over the call if it is likely to
introduce a significant interrupt latency by adding the line
`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
unlocked context switches. This define also implies
`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an
`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an
example.
......
......@@ -99,7 +99,7 @@ config GENERIC_IOMAP
bool
default y
config SCHED_NO_NO_OMIT_FRAME_POINTER
config SCHED_OMIT_FRAME_POINTER
bool
default y
......
......@@ -273,7 +273,7 @@ config GENERIC_CALIBRATE_DELAY
bool
default y
config SCHED_NO_NO_OMIT_FRAME_POINTER
config SCHED_OMIT_FRAME_POINTER
bool
default y
......
......@@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE
bool
default y
config SCHED_NO_NO_OMIT_FRAME_POINTER
config SCHED_OMIT_FRAME_POINTER
bool
default y
......
......@@ -141,7 +141,7 @@ config GENERIC_NVRAM
bool
default y if PPC32
config SCHED_NO_NO_OMIT_FRAME_POINTER
config SCHED_OMIT_FRAME_POINTER
bool
default y
......
......@@ -212,7 +212,7 @@ static void update_cpu_core_map(void)
cpu_core_map[cpu] = cpu_coregroup_map(cpu);
}
void arch_update_cpu_topology(void)
int arch_update_cpu_topology(void)
{
struct tl_info *info = tl_info;
struct sys_device *sysdev;
......@@ -221,7 +221,7 @@ void arch_update_cpu_topology(void)
if (!machine_has_topology) {
update_cpu_core_map();
topology_update_polarization_simple();
return;
return 0;
}
stsi(info, 15, 1, 2);
tl_to_cores(info);
......@@ -230,6 +230,7 @@ void arch_update_cpu_topology(void)
sysdev = get_cpu_sysdev(cpu);
kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
}
return 1;
}
static void topology_work_fn(struct work_struct *work)
......
......@@ -368,10 +368,10 @@ config X86_RDC321X
as R-8610-(G).
If you don't have one of these chips, you should say N here.
config SCHED_NO_NO_OMIT_FRAME_POINTER
config SCHED_OMIT_FRAME_POINTER
def_bool y
prompt "Single-depth WCHAN output"
depends on X86_32
depends on X86
help
Calculate simpler /proc/<PID>/wchan values. If this option
is disabled then wchan values will recurse back to the
......
......@@ -347,8 +347,8 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
static int proc_pid_schedstat(struct task_struct *task, char *buffer)
{
return sprintf(buffer, "%llu %llu %lu\n",
task->sched_info.cpu_time,
task->sched_info.run_delay,
(unsigned long long)task->se.sum_exec_runtime,
(unsigned long long)task->sched_info.run_delay,
task->sched_info.pcount);
}
#endif
......
......@@ -23,7 +23,7 @@
*/
#if defined(CONFIG_FRAME_POINTER) || \
!defined(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER)
!defined(CONFIG_SCHED_OMIT_FRAME_POINTER)
#define M32R_PUSH_FP " push fp\n"
#define M32R_POP_FP " pop fp\n"
#else
......
......@@ -260,8 +260,6 @@ static inline int select_nohz_load_balancer(int cpu)
}
#endif
extern unsigned long rt_needs_cpu(int cpu);
/*
* Only dump TASK_* tasks. (0 for all tasks)
*/
......@@ -669,8 +667,7 @@ struct reclaim_state;
struct sched_info {
/* cumulative counters */
unsigned long pcount; /* # of times run on this cpu */
unsigned long long cpu_time, /* time spent on the cpu */
run_delay; /* time spent waiting on a runqueue */
unsigned long long run_delay; /* time spent waiting on a runqueue */
/* timestamps */
unsigned long long last_arrival,/* when we last ran on a cpu */
......@@ -2210,6 +2207,7 @@ extern void normalize_rt_tasks(void);
extern struct task_group init_task_group;
#ifdef CONFIG_USER_SCHED
extern struct task_group root_task_group;
extern void set_tg_uid(struct user_struct *user);
#endif
extern struct task_group *sched_create_group(struct task_group *parent);
......
......@@ -49,7 +49,7 @@
for_each_online_node(node) \
if (nr_cpus_node(node))
void arch_update_cpu_topology(void);
int arch_update_cpu_topology(void);
/* Conform to ACPI 2.0 SLIT distance definitions */
#define LOCAL_DISTANCE 10
......
......@@ -19,7 +19,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
CFLAGS_REMOVE_sched.o = -pg
endif
obj-$(CONFIG_FREEZER) += freezer.o
......@@ -90,7 +89,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
# needed for x86 only. Why this used to be enabled for all architectures is beyond
# me. I suspect most platforms don't need this, but until we know that for sure
......
......@@ -127,7 +127,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
*/
t1 = tsk->sched_info.pcount;
t2 = tsk->sched_info.run_delay;
t3 = tsk->sched_info.cpu_time;
t3 = tsk->se.sum_exec_runtime;
d->cpu_count += t1;
......
This diff is collapsed.
......@@ -53,6 +53,40 @@ static unsigned long nsec_low(unsigned long long nsec)
#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
#ifdef CONFIG_FAIR_GROUP_SCHED
static void print_cfs_group_stats(struct seq_file *m, int cpu,
struct task_group *tg)
{
struct sched_entity *se = tg->se[cpu];
if (!se)
return;
#define P(F) \
SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
#define PN(F) \
SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
PN(se->exec_start);
PN(se->vruntime);
PN(se->sum_exec_runtime);
#ifdef CONFIG_SCHEDSTATS
PN(se->wait_start);
PN(se->sleep_start);
PN(se->block_start);
PN(se->sleep_max);
PN(se->block_max);
PN(se->exec_max);
PN(se->slice_max);
PN(se->wait_max);
PN(se->wait_sum);
P(se->wait_count);
#endif
P(se->load.weight);
#undef PN
#undef P
}
#endif
static void
print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
{
......@@ -121,20 +155,19 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
char path[128] = "";
struct cgroup *cgroup = NULL;
struct task_group *tg = cfs_rq->tg;
if (tg)
cgroup = tg->css.cgroup;
if (cgroup)
cgroup_path(cgroup, path, sizeof(path));
cgroup_path(tg->css.cgroup, path, sizeof(path));
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
{
uid_t uid = cfs_rq->tg->uid;
SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid);
}
#else
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
#endif
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
SPLIT_NS(cfs_rq->exec_clock));
......@@ -168,6 +201,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
#endif
print_cfs_group_stats(m, cpu, cfs_rq->tg);
#endif
}
......@@ -175,14 +209,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
char path[128] = "";
struct cgroup *cgroup = NULL;
struct task_group *tg = rt_rq->tg;
if (tg)
cgroup = tg->css.cgroup;
if (cgroup)
cgroup_path(cgroup, path, sizeof(path));
cgroup_path(tg->css.cgroup, path, sizeof(path));
SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
#else
......@@ -272,7 +301,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n",
SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n",
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
......
......@@ -492,6 +492,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
* overflow on 32 bits):
*/
delta_exec = (unsigned long)(now - curr->exec_start);
if (!delta_exec)
return;
__update_curr(cfs_rq, curr, delta_exec);
curr->exec_start = now;
......@@ -1345,12 +1347,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
{
struct task_struct *curr = rq->curr;
struct sched_entity *se = &curr->se, *pse = &p->se;
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
if (unlikely(rt_prio(p->prio))) {
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
update_curr(cfs_rq);
update_rq_clock(rq);
update_curr(cfs_rq);
if (unlikely(rt_prio(p->prio))) {
resched_task(curr);
return;
}
......
......@@ -77,7 +77,7 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
}
#define for_each_leaf_rt_rq(rt_rq, rq) \
list_for_each_entry(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
......@@ -537,13 +537,13 @@ static void update_curr_rt(struct rq *rq)
for_each_sched_rt_entity(rt_se) {
rt_rq = rt_rq_of_se(rt_se);
spin_lock(&rt_rq->rt_runtime_lock);
if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
spin_lock(&rt_rq->rt_runtime_lock);
rt_rq->rt_time += delta_exec;
if (sched_rt_runtime_exceeded(rt_rq))
resched_task(curr);
spin_unlock(&rt_rq->rt_runtime_lock);
}
spin_unlock(&rt_rq->rt_runtime_lock);
}
}
......@@ -909,9 +909,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
/* Only try algorithms three times */
#define RT_MAX_TRIES 3
static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
static void double_unlock_balance(struct rq *this_rq, struct rq *busiest);
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
......
......@@ -31,7 +31,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
rq->sched_switch, rq->sched_count, rq->sched_goidle,
rq->ttwu_count, rq->ttwu_local,
rq->rq_sched_info.cpu_time,
rq->rq_cpu_time,
rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
seq_printf(seq, "\n");
......@@ -123,7 +123,7 @@ static inline void
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
{
if (rq)
rq->rq_sched_info.cpu_time += delta;
rq->rq_cpu_time += delta;
}
static inline void
......@@ -236,7 +236,6 @@ static inline void sched_info_depart(struct task_struct *t)
unsigned long long delta = task_rq(t)->clock -
t->sched_info.last_arrival;
t->sched_info.cpu_time += delta;
rq_sched_info_depart(task_rq(t), delta);
if (t->state == TASK_RUNNING)
......
......@@ -104,6 +104,8 @@ static int sched_create_user(struct user_struct *up)
if (IS_ERR(up->tg))
rc = -ENOMEM;
set_tg_uid(up);
return rc;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment