v6.2-r1 Causes GPF With Dequeue Task When Playing Games
Running Project-C v6.2-r1 gets me a kernel oops when playing games. Restoring to r0 resolves this specific issue. I did a decode and got this:
general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] PREEMPT SMP NOPTI
CPU: 5 PID: 5762 Comm: VoiceofCardsThe Tainted: G O 6.2.5-lqx1-1-lqx #1
Hardware name: ASUS System Product Name/Pro WS X570-ACE, BIOS 4402 02/03/2023
RIP: 0010:__schedule (./include/linux/list.h:114 ./include/linux/list.h:137 ./include/linux/list.h:148 kernel/sched/alt_core.c:806 kernel/sched/alt_core.c:1363 kernel/sched/alt_core.c:4784)
Code: 08 0d 00 00 49 c7 84 24 98 01 00 00 00 00 00 00 48 29 c2 49 01 94 24 88 01 00 00 66 90 49 8b 54 24 78 49 8b 84 24 80 00 00 00 <48> 89 42 08 48 89 10 49 63 4c 24 >
All code
========
0: 08 0d 00 00 49 c7 or %cl,-0x38b70000(%rip) # 0xffffffffc7490006
6: 84 24 98 test %ah,(%rax,%rbx,4)
9: 01 00 add %eax,(%rax)
b: 00 00 add %al,(%rax)
d: 00 00 add %al,(%rax)
f: 00 48 29 add %cl,0x29(%rax)
12: c2 49 01 ret $0x149
15: 94 xchg %eax,%esp
16: 24 88 and $0x88,%al
18: 01 00 add %eax,(%rax)
1a: 00 66 90 add %ah,-0x70(%rsi)
1d: 49 8b 54 24 78 mov 0x78(%r12),%rdx
22: 49 8b 84 24 80 00 00 mov 0x80(%r12),%rax
29: 00
2a:* 48 89 42 08 mov %rax,0x8(%rdx) <-- trapping instruction
2e: 48 89 10 mov %rdx,(%rax)
31: 49 63 4c 24 00 movslq 0x0(%r12),%rcx
Code starting with the faulting instruction
===========================================
0: 48 89 42 08 mov %rax,0x8(%rdx)
4: 48 89 10 mov %rdx,(%rax)
7: 49 63 4c 24 00 movslq 0x0(%r12),%rcx
RSP: 0018:ffffc90007bafd30 EFLAGS: 00010046
RAX: dead000000000122 RBX: ffff888fee970ec0 RCX: 0000000000000016
RDX: dead000000000100 RSI: fffffffc33d6ea99 RDI: 0000000000000190
RBP: ffffc90007bafdd8 R08: 0000000000000002 R09: 0000000000002c03
R10: ffffc90006d9fda8 R11: ffff888157ac1b20 R12: ffff8884e521a7c0
R13: 0000000000000000 R14: ffff888fee970ef0 R15: 00007effe7bf8e60
FS: 000000006d33f6c0(0000) GS:ffff888fee940000(0000) knlGS:000000007fd60000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000e0802528 CR3: 0000000193a4e000 CR4: 0000000000750ee0
PKRU: 55555554
Call Trace:
<TASK>
? get_futex_key (kernel/futex/core.c:393)
schedule (./arch/x86/include/asm/preempt.h:85 (discriminator 1) kernel/sched/alt_core.c:4917 (discriminator 1))
futex_wait_multiple (kernel/futex/waitwake.c:507 kernel/futex/waitwake.c:542)
__x64_sys_futex_waitv (kernel/futex/syscalls.c:298 kernel/futex/syscalls.c:246 kernel/futex/syscalls.c:246)
do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
RIP: 0033:0x7effea44a0dd
Code: 5d c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b >
All code
========
0: 5d pop %rbp
1: c3 ret
2: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1)
9: 00 00 00
c: 90 nop
d: f3 0f 1e fa endbr64
11: 48 89 f8 mov %rdi,%rax
14: 48 89 f7 mov %rsi,%rdi
17: 48 89 d6 mov %rdx,%rsi
1a: 48 89 ca mov %rcx,%rdx
1d: 4d 89 c2 mov %r8,%r10
20: 4d 89 c8 mov %r9,%r8
23: 4c 8b 4c 24 08 mov 0x8(%rsp),%r9
28: 0f 05 syscall
2a:* 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax <-- trapping instruction
30: 73 01 jae 0x33
32: c3 ret
33: 48 8b 00 mov (%rax),%rax
Code starting with the faulting instruction
===========================================
0: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax
6: 73 01 jae 0x9
8: c3 ret
9: 48 8b 00 mov (%rax),%rax
RSP: 002b:000000006d33d938 EFLAGS: 00000246 ORIG_RAX: 00000000000001c1
RAX: ffffffffffffffda RBX: 00007effe8a269e0 RCX: 00007effea44a0dd
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 000000006d33dd90
RBP: 0000000000000001 R08: 0000000000000000 R09: 000000016d33e260
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
R13: 000000006d33d97c R14: 0000000000000000 R15: 000000006d2dfc30
</TASK>
Modules linked in: af_packet rfcomm cmac algif_hash algif_skcipher af_alg bnep btusb btrtl btbcm btintel btmtk bluetooth ecdh_generic nf_conntrack_netlink xfrm_user xf>
aesni_intel platform_profile crypto_simd sp5100_tco soundcore igb cryptd rfkill mxm_wmi wmi_bmof ccp k10temp pcspkr i2c_piix4 r8169 dca ipmi_devintf tpm_crb ipmi_msgh>
---[ end trace 0000000000000000 ]---
RIP: 0010:__schedule (./include/linux/list.h:114 ./include/linux/list.h:137 ./include/linux/list.h:148 kernel/sched/alt_core.c:806 kernel/sched/alt_core.c:1363 kernel/sched/alt_core.c:4784)
Code: 08 0d 00 00 49 c7 84 24 98 01 00 00 00 00 00 00 48 29 c2 49 01 94 24 88 01 00 00 66 90 49 8b 54 24 78 49 8b 84 24 80 00 00 00 <48> 89 42 08 48 89 10 49 63 4c 24 >
All code
========
0: 08 0d 00 00 49 c7 or %cl,-0x38b70000(%rip) # 0xffffffffc7490006
6: 84 24 98 test %ah,(%rax,%rbx,4)
9: 01 00 add %eax,(%rax)
b: 00 00 add %al,(%rax)
d: 00 00 add %al,(%rax)
f: 00 48 29 add %cl,0x29(%rax)
12: c2 49 01 ret $0x149
15: 94 xchg %eax,%esp
16: 24 88 and $0x88,%al
18: 01 00 add %eax,(%rax)
1a: 00 66 90 add %ah,-0x70(%rsi)
1d: 49 8b 54 24 78 mov 0x78(%r12),%rdx
22: 49 8b 84 24 80 00 00 mov 0x80(%r12),%rax
29: 00
2a:* 48 89 42 08 mov %rax,0x8(%rdx) <-- trapping instruction
2e: 48 89 10 mov %rdx,(%rax)
31: 49 63 4c 24 00 movslq 0x0(%r12),%rcx
Code starting with the faulting instruction
===========================================
0: 48 89 42 08 mov %rax,0x8(%rdx)
4: 48 89 10 mov %rdx,(%rax)
7: 49 63 4c 24 00 movslq 0x0(%r12),%rcx
RSP: 0018:ffffc90007bafd30 EFLAGS: 00010046
RAX: dead000000000122 RBX: ffff888fee970ec0 RCX: 0000000000000016
RDX: dead000000000100 RSI: fffffffc33d6ea99 RDI: 0000000000000190
RBP: ffffc90007bafdd8 R08: 0000000000000002 R09: 0000000000002c03
R10: ffffc90006d9fda8 R11: ffff888157ac1b20 R12: ffff8884e521a7c0
R13: 0000000000000000 R14: ffff888fee970ef0 R15: 00007effe7bf8e60
FS: 000000006d33f6c0(0000) GS:ffff888fee940000(0000) knlGS:000000007fd60000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000e0802528 CR3: 0000000193a4e000 CR4: 0000000000750ee0
PKRU: 55555554
alt_core.c:4784
/*
* __schedule() ttwu()
* prev_state = prev->state; if (p->on_rq && ...)
* if (prev_state) goto out;
* p->on_rq = 0; smp_acquire__after_ctrl_dep();
* p->state = TASK_WAKING
*
* Where __schedule() and ttwu() have matching control dependencies.
*
* After this, schedule() must not care about p->state any more.
*/
sched_task_deactivate(prev, rq);
--> deactivate_task(prev, rq);
deactivated = 1;
if (prev->in_iowait) {
atomic_inc(&rq->nr_iowait);
delayacct_blkio_start();
}
}
alt_core.c:1363
/*
* deactivate_task - remove a task from the runqueue.
*
* Context: rq->lock
*/
static inline void deactivate_task(struct task_struct *p, struct rq *rq)
{
p->on_rq = 0;
--> dequeue_task(p, rq, DEQUEUE_SLEEP);
cpufreq_update_util(rq, 0);
}
alt_core.c:806
static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags)
{
#ifdef ALT_SCHED_DEBUG
lockdep_assert_held(&rq->lock);
/*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n",
task_cpu(p), cpu_of(rq));
#endif
--> __SCHED_DEQUEUE_TASK(p, rq, flags);
--rq->nr_running;
#ifdef CONFIG_SMP
if (1 == rq->nr_running)
cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask);
#endif
sched_update_tick_dependency(rq);
}
alt_core.c:782 (guessed by trace after this one)
/*
* Add/Remove/Requeue task to/from the runqueue routines
* Context: rq->lock
*/
#define __SCHED_DEQUEUE_TASK(p, rq, flags) \
sched_info_dequeue(rq, p); \
psi_dequeue(p, flags & DEQUEUE_SLEEP); \
\
--> list_del(&p->sq_node); \
if (list_empty(&rq->queue.heads[p->sq_idx])) \
clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \
update_sched_preempt_mask(rq);
list.h:148
/**
* list_del - deletes entry from list.
* @entry: the element to delete from the list.
* Note: list_empty() on entry does not return true after this, the entry is
* in an undefined state.
*/
static inline void list_del(struct list_head *entry)
{
--> __list_del_entry(entry);
entry->next = LIST_POISON1;
entry->prev = LIST_POISON2;
}
What's odd about this one is that now we're hitting this same issue but through just deactivating a task, not the migrate_pending_tasks. This new oops also happens regardless of whether I apply @torvic9's patches or not for en/dequeuing tasks.