v6.2-r1 Causes GPF With Dequeue Task When Playing Games
Running Project-C v6.2-r1 gets me a kernel oops when playing games. Restoring to r0 resolves this specific issue. I did a decode and got this: ``` general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] PREEMPT SMP NOPTI CPU: 5 PID: 5762 Comm: VoiceofCardsThe Tainted: G O 6.2.5-lqx1-1-lqx #1 Hardware name: ASUS System Product Name/Pro WS X570-ACE, BIOS 4402 02/03/2023 RIP: 0010:__schedule (./include/linux/list.h:114 ./include/linux/list.h:137 ./include/linux/list.h:148 kernel/sched/alt_core.c:806 kernel/sched/alt_core.c:1363 kernel/sched/alt_core.c:4784) Code: 08 0d 00 00 49 c7 84 24 98 01 00 00 00 00 00 00 48 29 c2 49 01 94 24 88 01 00 00 66 90 49 8b 54 24 78 49 8b 84 24 80 00 00 00 <48> 89 42 08 48 89 10 49 63 4c 24 > All code ======== 0: 08 0d 00 00 49 c7 or %cl,-0x38b70000(%rip) # 0xffffffffc7490006 6: 84 24 98 test %ah,(%rax,%rbx,4) 9: 01 00 add %eax,(%rax) b: 00 00 add %al,(%rax) d: 00 00 add %al,(%rax) f: 00 48 29 add %cl,0x29(%rax) 12: c2 49 01 ret $0x149 15: 94 xchg %eax,%esp 16: 24 88 and $0x88,%al 18: 01 00 add %eax,(%rax) 1a: 00 66 90 add %ah,-0x70(%rsi) 1d: 49 8b 54 24 78 mov 0x78(%r12),%rdx 22: 49 8b 84 24 80 00 00 mov 0x80(%r12),%rax 29: 00 2a:* 48 89 42 08 mov %rax,0x8(%rdx) <-- trapping instruction 2e: 48 89 10 mov %rdx,(%rax) 31: 49 63 4c 24 00 movslq 0x0(%r12),%rcx Code starting with the faulting instruction =========================================== 0: 48 89 42 08 mov %rax,0x8(%rdx) 4: 48 89 10 mov %rdx,(%rax) 7: 49 63 4c 24 00 movslq 0x0(%r12),%rcx RSP: 0018:ffffc90007bafd30 EFLAGS: 00010046 RAX: dead000000000122 RBX: ffff888fee970ec0 RCX: 0000000000000016 RDX: dead000000000100 RSI: fffffffc33d6ea99 RDI: 0000000000000190 RBP: ffffc90007bafdd8 R08: 0000000000000002 R09: 0000000000002c03 R10: ffffc90006d9fda8 R11: ffff888157ac1b20 R12: ffff8884e521a7c0 R13: 0000000000000000 R14: ffff888fee970ef0 R15: 00007effe7bf8e60 FS: 000000006d33f6c0(0000) GS:ffff888fee940000(0000) knlGS:000000007fd60000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000e0802528 CR3: 0000000193a4e000 CR4: 0000000000750ee0 PKRU: 55555554 Call Trace: <TASK> ? get_futex_key (kernel/futex/core.c:393) schedule (./arch/x86/include/asm/preempt.h:85 (discriminator 1) kernel/sched/alt_core.c:4917 (discriminator 1)) futex_wait_multiple (kernel/futex/waitwake.c:507 kernel/futex/waitwake.c:542) __x64_sys_futex_waitv (kernel/futex/syscalls.c:298 kernel/futex/syscalls.c:246 kernel/futex/syscalls.c:246) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) RIP: 0033:0x7effea44a0dd Code: 5d c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b > All code ======== 0: 5d pop %rbp 1: c3 ret 2: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1) 9: 00 00 00 c: 90 nop d: f3 0f 1e fa endbr64 11: 48 89 f8 mov %rdi,%rax 14: 48 89 f7 mov %rsi,%rdi 17: 48 89 d6 mov %rdx,%rsi 1a: 48 89 ca mov %rcx,%rdx 1d: 4d 89 c2 mov %r8,%r10 20: 4d 89 c8 mov %r9,%r8 23: 4c 8b 4c 24 08 mov 0x8(%rsp),%r9 28: 0f 05 syscall 2a:* 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax <-- trapping instruction 30: 73 01 jae 0x33 32: c3 ret 33: 48 8b 00 mov (%rax),%rax Code starting with the faulting instruction =========================================== 0: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax 6: 73 01 jae 0x9 8: c3 ret 9: 48 8b 00 mov (%rax),%rax RSP: 002b:000000006d33d938 EFLAGS: 00000246 ORIG_RAX: 00000000000001c1 RAX: ffffffffffffffda RBX: 00007effe8a269e0 RCX: 00007effea44a0dd RDX: 0000000000000000 RSI: 0000000000000001 RDI: 000000006d33dd90 RBP: 0000000000000001 R08: 0000000000000000 R09: 000000016d33e260 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 000000006d33d97c R14: 0000000000000000 R15: 000000006d2dfc30 </TASK> Modules linked in: af_packet rfcomm cmac algif_hash algif_skcipher af_alg bnep btusb btrtl btbcm btintel btmtk bluetooth ecdh_generic nf_conntrack_netlink xfrm_user xf> aesni_intel platform_profile crypto_simd sp5100_tco soundcore igb cryptd rfkill mxm_wmi wmi_bmof ccp k10temp pcspkr i2c_piix4 r8169 dca ipmi_devintf tpm_crb ipmi_msgh> ---[ end trace 0000000000000000 ]--- RIP: 0010:__schedule (./include/linux/list.h:114 ./include/linux/list.h:137 ./include/linux/list.h:148 kernel/sched/alt_core.c:806 kernel/sched/alt_core.c:1363 kernel/sched/alt_core.c:4784) Code: 08 0d 00 00 49 c7 84 24 98 01 00 00 00 00 00 00 48 29 c2 49 01 94 24 88 01 00 00 66 90 49 8b 54 24 78 49 8b 84 24 80 00 00 00 <48> 89 42 08 48 89 10 49 63 4c 24 > All code ======== 0: 08 0d 00 00 49 c7 or %cl,-0x38b70000(%rip) # 0xffffffffc7490006 6: 84 24 98 test %ah,(%rax,%rbx,4) 9: 01 00 add %eax,(%rax) b: 00 00 add %al,(%rax) d: 00 00 add %al,(%rax) f: 00 48 29 add %cl,0x29(%rax) 12: c2 49 01 ret $0x149 15: 94 xchg %eax,%esp 16: 24 88 and $0x88,%al 18: 01 00 add %eax,(%rax) 1a: 00 66 90 add %ah,-0x70(%rsi) 1d: 49 8b 54 24 78 mov 0x78(%r12),%rdx 22: 49 8b 84 24 80 00 00 mov 0x80(%r12),%rax 29: 00 2a:* 48 89 42 08 mov %rax,0x8(%rdx) <-- trapping instruction 2e: 48 89 10 mov %rdx,(%rax) 31: 49 63 4c 24 00 movslq 0x0(%r12),%rcx Code starting with the faulting instruction =========================================== 0: 48 89 42 08 mov %rax,0x8(%rdx) 4: 48 89 10 mov %rdx,(%rax) 7: 49 63 4c 24 00 movslq 0x0(%r12),%rcx RSP: 0018:ffffc90007bafd30 EFLAGS: 00010046 RAX: dead000000000122 RBX: ffff888fee970ec0 RCX: 0000000000000016 RDX: dead000000000100 RSI: fffffffc33d6ea99 RDI: 0000000000000190 RBP: ffffc90007bafdd8 R08: 0000000000000002 R09: 0000000000002c03 R10: ffffc90006d9fda8 R11: ffff888157ac1b20 R12: ffff8884e521a7c0 R13: 0000000000000000 R14: ffff888fee970ef0 R15: 00007effe7bf8e60 FS: 000000006d33f6c0(0000) GS:ffff888fee940000(0000) knlGS:000000007fd60000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000e0802528 CR3: 0000000193a4e000 CR4: 0000000000750ee0 PKRU: 55555554 ``` alt_core.c:4784 ```c /* * __schedule() ttwu() * prev_state = prev->state; if (p->on_rq && ...) * if (prev_state) goto out; * p->on_rq = 0; smp_acquire__after_ctrl_dep(); * p->state = TASK_WAKING * * Where __schedule() and ttwu() have matching control dependencies. * * After this, schedule() must not care about p->state any more. */ sched_task_deactivate(prev, rq); --> deactivate_task(prev, rq); deactivated = 1; if (prev->in_iowait) { atomic_inc(&rq->nr_iowait); delayacct_blkio_start(); } } ``` alt_core.c:1363 ```c /* * deactivate_task - remove a task from the runqueue. * * Context: rq->lock */ static inline void deactivate_task(struct task_struct *p, struct rq *rq) { p->on_rq = 0; --> dequeue_task(p, rq, DEQUEUE_SLEEP); cpufreq_update_util(rq, 0); } ``` alt_core.c:806 ```c static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) { #ifdef ALT_SCHED_DEBUG lockdep_assert_held(&rq->lock); /*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n", task_cpu(p), cpu_of(rq)); #endif --> __SCHED_DEQUEUE_TASK(p, rq, flags); --rq->nr_running; #ifdef CONFIG_SMP if (1 == rq->nr_running) cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask); #endif sched_update_tick_dependency(rq); } ``` alt_core.c:782 (guessed by trace after this one) ```c /* * Add/Remove/Requeue task to/from the runqueue routines * Context: rq->lock */ #define __SCHED_DEQUEUE_TASK(p, rq, flags) \ sched_info_dequeue(rq, p); \ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ \ --> list_del(&p->sq_node); \ if (list_empty(&rq->queue.heads[p->sq_idx])) \ clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ update_sched_preempt_mask(rq); ``` list.h:148 ```c /** * list_del - deletes entry from list. * @entry: the element to delete from the list. * Note: list_empty() on entry does not return true after this, the entry is * in an undefined state. */ static inline void list_del(struct list_head *entry) { --> __list_del_entry(entry); entry->next = LIST_POISON1; entry->prev = LIST_POISON2; } ``` What's odd about this one is that now we're hitting this same issue but through just deactivating a task, not the **migrate_pending_tasks**. This new oops also happens regardless of whether I apply @torvic9's patches or not for en/dequeuing tasks.
issue