From 3b6054da68f9b0d5ed6a7ed0f42a79e61904352c Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 1 Oct 2012 22:21:12 +0300 Subject: usb hub: send clear_tt_buffer_complete events when canceling TT clear work There is a race condition in the USB hub code with regard to handling TT clear requests that can get the HCD driver in a deadlock. Usually when an TT clear request is scheduled it will be executed immediately: <7>[ 6.077583] usb 2-1.3: unlink qh1-0e01/f4d4db00 start 0 [1/2 us] <3>[ 6.078041] usb 2-1: clear tt buffer port 3, a3 ep2 t04048d82 <7>[ 6.078299] hub_tt_work:731 <7>[ 9.309089] usb 2-1.5: link qh1-0e01/f4d506c0 start 0 [1/2 us] <7>[ 9.324526] ehci_hcd 0000:00:1d.0: reused qh f4d4db00 schedule <7>[ 9.324539] usb 2-1.3: link qh1-0e01/f4d4db00 start 0 [1/2 us] <7>[ 9.341530] usb 1-1.1: link qh4-0e01/f397aec0 start 2 [1/2 us] <7>[ 10.116159] usb 2-1.3: unlink qh1-0e01/f4d4db00 start 0 [1/2 us] <3>[ 10.116459] usb 2-1: clear tt buffer port 3, a3 ep2 t04048d82 <7>[ 10.116537] hub_tt_work:731 However, if a suspend operation is triggered before hub_tt_work is scheduled, hub_quiesce will cancel the work without notifying the HCD driver: <3>[ 35.033941] usb 2-1: clear tt buffer port 3, a3 ep2 t04048d80 <5>[ 35.034022] sd 0:0:0:0: [sda] Stopping disk <7>[ 35.034039] hub 2-1:1.0: hub_suspend <7>[ 35.034067] usb 2-1: unlink qh256-0001/f3b1ab00 start 1 [1/0 us] <7>[ 35.035085] hub 1-0:1.0: hub_suspend <7>[ 35.035102] usb usb1: bus suspend, wakeup 0 <7>[ 35.035106] ehci_hcd 0000:00:1a.0: suspend root hub <7>[ 35.035298] hub 2-0:1.0: hub_suspend <7>[ 35.035313] usb usb2: bus suspend, wakeup 0 <7>[ 35.035315] ehci_hcd 0000:00:1d.0: suspend root hub <6>[ 35.250017] PM: suspend of devices complete after 216.979 msecs <6>[ 35.250822] PM: late suspend of devices complete after 0.799 msecs <7>[ 35.252343] ehci_hcd 0000:00:1d.0: wakeup: 1 <7>[ 35.262923] ehci_hcd 0000:00:1d.0: --> PCI D3hot <7>[ 35.263302] ehci_hcd 0000:00:1a.0: wakeup: 1 <7>[ 35.273912] ehci_hcd 0000:00:1a.0: --> PCI D3hot <6>[ 35.274254] PM: noirq suspend of devices complete after 23.442 msecs <6>[ 35.274975] ACPI: Preparing to enter system sleep state S3 <6>[ 35.292666] PM: Saving platform NVS memory <7>[ 35.295030] Disabling non-boot CPUs ... <6>[ 35.297351] CPU 1 is now offline <6>[ 35.300345] CPU 2 is now offline <6>[ 35.303929] CPU 3 is now offline <7>[ 35.303931] lockdep: fixing up alternatives. <6>[ 35.304825] Extended CMOS year: 2000 When the device will resume the EHCI driver will get stuck in ehci_endpoint_disable waiting for the tt_clearing flag to reset: <0>[ 47.610967] usb 2-1.3: **** DPM device timeout **** <7>[ 47.610972] f2f11c60 00000092 f2f11c0c c10624a5 00000003 f4c6e880 c1c8a4c0 c1c8a4c0 <7>[ 47.610983] 15c55698 0000000b f56b34c0 f2a45b70 f4c6e880 00000082 f2a4602c f2f11c30 <7>[ 47.610993] c10787f8 f4cac000 f2a45b70 00000000 f4cac010 f2f11c58 00000046 00000001 <7>[ 47.611004] Call Trace: <7>[ 47.611006] [] ? sched_clock_cpu+0xf5/0x160 <7>[ 47.611019] [] ? lock_release_holdtime.part.22+0x88/0xf0 <7>[ 47.611026] [] ? lock_timer_base.isra.35+0x26/0x50 <7>[ 47.611034] [] ? schedule_timeout+0x133/0x290 <7>[ 47.611044] [] schedule+0x1e/0x50 <7>[ 47.611051] [] schedule_timeout+0x138/0x290 <7>[ 47.611057] [] ? sched_clock_cpu+0xf5/0x160 <7>[ 47.611063] [] ? usleep_range+0x40/0x40 <7>[ 47.611070] [] schedule_timeout_uninterruptible+0x15/0x20 <7>[ 47.611077] [] ehci_endpoint_disable+0x64/0x160 <7>[ 47.611084] [] ? usb_hcd_flush_endpoint+0x10e/0x1d0 <7>[ 47.611092] [] ? sysfs_add_file+0x13/0x20 <7>[ 47.611100] [] usb_hcd_disable_endpoint+0x29/0x40 <7>[ 47.611107] [] usb_disable_endpoint+0x5c/0x80 <7>[ 47.611111] [] usb_disable_interface+0x37/0x50 <7>[ 47.611116] [] usb_reset_and_verify_device+0x4b0/0x640 <7>[ 47.611122] [] ? hub_port_status+0xb5/0x100 <7>[ 47.611129] [] usb_port_resume+0xd5/0x220 <7>[ 47.611136] [] generic_resume+0xf/0x30 <7>[ 47.611142] [] usb_resume+0x133/0x180 <7>[ 47.611147] [] ? usb_dev_thaw+0x10/0x10 <7>[ 47.611152] [] usb_dev_resume+0xd/0x10 <7>[ 47.611157] [] dpm_run_callback+0x40/0xb0 <7>[ 47.611164] [] ? pm_runtime_enable+0x43/0x70 <7>[ 47.611171] [] device_resume+0x1a6/0x2c0 <7>[ 47.611177] [] ? dpm_show_time+0xe0/0xe0 <7>[ 47.611183] [] async_resume+0x19/0x40 <7>[ 47.611189] [] async_run_entry_fn+0x64/0x160 <7>[ 47.611196] [] ? process_one_work+0x104/0x480 <7>[ 47.611203] [] ? process_one_work+0x10c/0x480 <7>[ 47.611209] [] process_one_work+0x180/0x480 <7>[ 47.611215] [] ? process_one_work+0x104/0x480 <7>[ 47.611220] [] ? async_schedule+0x10/0x10 <7>[ 47.611226] [] worker_thread+0x11c/0x2f0 <7>[ 47.611233] [] ? manage_workers.isra.27+0x1f0/0x1f0 <7>[ 47.611239] [] kthread+0x78/0x80 <7>[ 47.611244] [] ? timer_cpu_notify+0xd6/0x20d <7>[ 47.611253] [] ? __init_kthread_worker+0x60/0x60 <7>[ 47.611258] [] kernel_thread_helper+0x6/0xd <7>[ 47.611283] ------------[ cut here ]------------ This patch changes hub_quiesce behavior to flush the TT clear work instead of canceling it, to make sure that no TT clear request remains uncompleted before suspend. Signed-off-by: Octavian Purdila Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 64854d76f529..1181e917fec7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -739,13 +739,16 @@ static void hub_tt_work(struct work_struct *work) int limit = 100; spin_lock_irqsave (&hub->tt.lock, flags); - while (--limit && !list_empty (&hub->tt.clear_list)) { + while (!list_empty(&hub->tt.clear_list)) { struct list_head *next; struct usb_tt_clear *clear; struct usb_device *hdev = hub->hdev; const struct hc_driver *drv; int status; + if (!hub->quiescing && --limit < 0) + break; + next = hub->tt.clear_list.next; clear = list_entry (next, struct usb_tt_clear, clear_list); list_del (&clear->clear_list); @@ -1210,7 +1213,7 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type) if (hub->has_indicators) cancel_delayed_work_sync(&hub->leds); if (hub->tt.hub) - cancel_work_sync(&hub->tt.clear_work); + flush_work_sync(&hub->tt.clear_work); } /* caller has locked the hub device */ -- cgit v1.2.1