...
 
Commits (13)
  • Jason Gunthorpe's avatar
    RDMA/odp: Fix leaking the tgid for implicit ODP · 0f9826f4
    Jason Gunthorpe authored
    The tgid used to be part of ib_umem_free_notifier(), when it was reworked
    it got moved to release, but it should have been unconditional as all umem
    alloc paths get the tgid.
    
    As is, creating an implicit ODP will leak the tgid reference.
    
    Link: https://lore.kernel.org/r/20200304181607.GA22412@ziepe.ca
    Cc: stable@kernel.org
    Fixes: f25a546e ("RDMA/odp: Use mmu_interval_notifier_insert()")
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    0f9826f4
  • Weihang Li's avatar
    MAINTAINERS: Update maintainers for HISILICON ROCE DRIVER · d372abf3
    Weihang Li authored
    Add myself as a maintainer for HNS RoCE drivers, and update Xavier's
    e-amil address.
    
    Link: https://lore.kernel.org/r/1583575114-32194-1-git-send-email-liweihang@huawei.comSigned-off-by: default avatarWeihang Li <liweihang@huawei.com>
    Acked-by: default avatarWei Hu (Xavier) <xavier.huwei@huawei.com>
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    d372abf3
  • Mark Zhang's avatar
    RDMA/mlx5: Fix the number of hwcounters of a dynamic counter · ec16b6bb
    Mark Zhang authored
    When we read the global counter and there's any dynamic counter allocated,
    the value of a hwcounter is the sum of the default counter and all dynamic
    counters. So the number of hwcounters of a dynamically allocated counter
    must be same as of the default counter, otherwise there will be read
    violations.
    
    This fixes the KASAN slab-out-of-bounds bug:
    
      BUG: KASAN: slab-out-of-bounds in rdma_counter_get_hwstat_value+0x36d/0x390 [ib_core]
      Read of size 8 at addr ffff8884192a5778 by task rdma/10138
    
      CPU: 7 PID: 10138 Comm: rdma Not tainted 5.5.0-for-upstream-dbg-2020-02-06_18-30-19-27 #1
      Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
      Call Trace:
       dump_stack+0xb7/0x10b
       print_address_description.constprop.4+0x1e2/0x400
       ? rdma_counter_get_hwstat_value+0x36d/0x390 [ib_core]
       __kasan_report+0x15c/0x1e0
       ? mlx5_ib_query_q_counters+0x13f/0x270 [mlx5_ib]
       ? rdma_counter_get_hwstat_value+0x36d/0x390 [ib_core]
       kasan_report+0xe/0x20
       rdma_counter_get_hwstat_value+0x36d/0x390 [ib_core]
       ? rdma_counter_query_stats+0xd0/0xd0 [ib_core]
       ? memcpy+0x34/0x50
       ? nla_put+0xe2/0x170
       nldev_stat_get_doit+0x9c7/0x14f0 [ib_core]
       ...
       do_syscall_64+0x95/0x490
       entry_SYSCALL_64_after_hwframe+0x49/0xbe
      RIP: 0033:0x7fcc457fe65a
      Code: bb 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 8b 05 fa f1 2b 00 45 89 c9 4c 63 d1 48 63 ff 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 f3 c3 0f 1f 40 00 41 55 41 54 4d 89 c5 55
      RSP: 002b:00007ffc0586f868 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
      RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fcc457fe65a
      RDX: 0000000000000020 RSI: 00000000013db920 RDI: 0000000000000003
      RBP: 00007ffc0586fa90 R08: 00007fcc45ac10e0 R09: 000000000000000c
      R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004089c0
      R13: 0000000000000000 R14: 00007ffc0586fab0 R15: 00000000013dc9a0
    
      Allocated by task 9700:
       save_stack+0x19/0x80
       __kasan_kmalloc.constprop.7+0xa0/0xd0
       mlx5_ib_counter_alloc_stats+0xd1/0x1d0 [mlx5_ib]
       rdma_counter_alloc+0x16d/0x3f0 [ib_core]
       rdma_counter_bind_qpn_alloc+0x216/0x4e0 [ib_core]
       nldev_stat_set_doit+0x8c2/0xb10 [ib_core]
       rdma_nl_rcv_msg+0x3d2/0x730 [ib_core]
       rdma_nl_rcv+0x2a8/0x400 [ib_core]
       netlink_unicast+0x448/0x620
       netlink_sendmsg+0x731/0xd10
       sock_sendmsg+0xb1/0xf0
       __sys_sendto+0x25d/0x2c0
       __x64_sys_sendto+0xdd/0x1b0
       do_syscall_64+0x95/0x490
       entry_SYSCALL_64_after_hwframe+0x49/0xbe
    
    Fixes: 18d422ce ("IB/mlx5: Add counter_alloc_stats() and counter_update_stats() support")
    Link: https://lore.kernel.org/r/20200305124052.196688-1-leon@kernel.orgSigned-off-by: default avatarMark Zhang <markz@mellanox.com>
    Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    ec16b6bb
  • Jason Gunthorpe's avatar
    RDMA/nl: Do not permit empty devices names during RDMA_NLDEV_CMD_NEWLINK/SET · 7aefa623
    Jason Gunthorpe authored
    Empty device names cannot be added to sysfs and crash with:
    
      kobject: (00000000f9de3792): attempted to be registered with empty name!
      WARNING: CPU: 1 PID: 10856 at lib/kobject.c:234 kobject_add_internal+0x7ac/0x9a0 lib/kobject.c:234
      Kernel panic - not syncing: panic_on_warn set ...
      CPU: 1 PID: 10856 Comm: syz-executor459 Not tainted 5.6.0-rc3-syzkaller #0
      Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
      Call Trace:
       __dump_stack lib/dump_stack.c:77 [inline]
       dump_stack+0x197/0x210 lib/dump_stack.c:118
       panic+0x2e3/0x75c kernel/panic.c:221
       __warn.cold+0x2f/0x3e kernel/panic.c:582
       report_bug+0x289/0x300 lib/bug.c:195
       fixup_bug arch/x86/kernel/traps.c:174 [inline]
       fixup_bug arch/x86/kernel/traps.c:169 [inline]
       do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:267
       do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:286
       invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027
      RIP: 0010:kobject_add_internal+0x7ac/0x9a0 lib/kobject.c:234
      Code: 7a ca ca f9 e9 f0 f8 ff ff 4c 89 f7 e8 cd ca ca f9 e9 95 f9 ff ff e8 13 25 8c f9 4c 89 e6 48 c7 c7 a0 08 1a 89 e8 a3 76 5c f9 <0f> 0b 41 bd ea ff ff ff e9 52 ff ff ff e8 f2 24 8c f9 0f 0b e8 eb
      RSP: 0018:ffffc90002006eb0 EFLAGS: 00010286
      RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
      RDX: 0000000000000000 RSI: ffffffff815eae46 RDI: fffff52000400dc8
      RBP: ffffc90002006f08 R08: ffff8880972ac500 R09: ffffed1015d26659
      R10: ffffed1015d26658 R11: ffff8880ae9332c7 R12: ffff888093034668
      R13: 0000000000000000 R14: ffff8880a69d7600 R15: 0000000000000001
       kobject_add_varg lib/kobject.c:390 [inline]
       kobject_add+0x150/0x1c0 lib/kobject.c:442
       device_add+0x3be/0x1d00 drivers/base/core.c:2412
       ib_register_device drivers/infiniband/core/device.c:1371 [inline]
       ib_register_device+0x93e/0xe40 drivers/infiniband/core/device.c:1343
       rxe_register_device+0x52e/0x655 drivers/infiniband/sw/rxe/rxe_verbs.c:1231
       rxe_add+0x122b/0x1661 drivers/infiniband/sw/rxe/rxe.c:302
       rxe_net_add+0x91/0xf0 drivers/infiniband/sw/rxe/rxe_net.c:539
       rxe_newlink+0x39/0x90 drivers/infiniband/sw/rxe/rxe.c:318
       nldev_newlink+0x28a/0x430 drivers/infiniband/core/nldev.c:1538
       rdma_nl_rcv_msg drivers/infiniband/core/netlink.c:195 [inline]
       rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline]
       rdma_nl_rcv+0x5d9/0x980 drivers/infiniband/core/netlink.c:259
       netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
       netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1329
       netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1918
       sock_sendmsg_nosec net/socket.c:652 [inline]
       sock_sendmsg+0xd7/0x130 net/socket.c:672
       ____sys_sendmsg+0x753/0x880 net/socket.c:2343
       ___sys_sendmsg+0x100/0x170 net/socket.c:2397
       __sys_sendmsg+0x105/0x1d0 net/socket.c:2430
       __do_sys_sendmsg net/socket.c:2439 [inline]
       __se_sys_sendmsg net/socket.c:2437 [inline]
       __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
       do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
       entry_SYSCALL_64_after_hwframe+0x49/0xbe
    
    Prevent empty names when checking the name provided from userspace during
    newlink and rename.
    
    Fixes: 3856ec4b ("RDMA/core: Add RDMA_NLDEV_CMD_NEWLINK/DELLINK support")
    Fixes: 05d940d3 ("RDMA/nldev: Allow IB device rename through RDMA netlink")
    Cc: stable@kernel.org
    Link: https://lore.kernel.org/r/20200309191648.GA30852@ziepe.ca
    Reported-and-tested-by: syzbot+da615ac67d4dbea32cbc@syzkaller.appspotmail.com
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    7aefa623
  • Jason Gunthorpe's avatar
    RDMA/core: Fix missing error check on dev_set_name() · f2f2b3bb
    Jason Gunthorpe authored
    If name memory allocation fails the name will be left empty and
    device_add_one() will crash:
    
      kobject: (0000000004952746): attempted to be registered with empty name!
      WARNING: CPU: 0 PID: 329 at lib/kobject.c:234 kobject_add_internal+0x7ac/0x9a0 lib/kobject.c:234
      Kernel panic - not syncing: panic_on_warn set ...
      CPU: 0 PID: 329 Comm: syz-executor.5 Not tainted 5.6.0-rc2-syzkaller #0
      Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
      Call Trace:
       __dump_stack lib/dump_stack.c:77 [inline]
       dump_stack+0x197/0x210 lib/dump_stack.c:118
       panic+0x2e3/0x75c kernel/panic.c:221
       __warn.cold+0x2f/0x3e kernel/panic.c:582
       report_bug+0x289/0x300 lib/bug.c:195
       fixup_bug arch/x86/kernel/traps.c:174 [inline]
       fixup_bug arch/x86/kernel/traps.c:169 [inline]
       do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:267
       do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:286
       invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027
      RIP: 0010:kobject_add_internal+0x7ac/0x9a0 lib/kobject.c:234
      Code: 1a 98 ca f9 e9 f0 f8 ff ff 4c 89 f7 e8 6d 98 ca f9 e9 95 f9 ff ff e8 c3 f0 8b f9 4c 89 e6 48 c7 c7 a0 0e 1a 89 e8 e3 41 5c f9 <0f> 0b 41 bd ea ff ff ff e9 52 ff ff ff e8 a2 f0 8b f9 0f 0b e8 9b
      RSP: 0018:ffffc90005b27908 EFLAGS: 00010286
      RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
      RDX: 0000000000040000 RSI: ffffffff815eae46 RDI: fffff52000b64f13
      RBP: ffffc90005b27960 R08: ffff88805aeba480 R09: ffffed1015d06659
      R10: ffffed1015d06658 R11: ffff8880ae8332c7 R12: ffff8880a37fd000
      R13: 0000000000000000 R14: ffff888096691780 R15: 0000000000000001
       kobject_add_varg lib/kobject.c:390 [inline]
       kobject_add+0x150/0x1c0 lib/kobject.c:442
       device_add+0x3be/0x1d00 drivers/base/core.c:2412
       add_one_compat_dev drivers/infiniband/core/device.c:901 [inline]
       add_one_compat_dev+0x46a/0x7e0 drivers/infiniband/core/device.c:857
       rdma_dev_init_net+0x2eb/0x490 drivers/infiniband/core/device.c:1120
       ops_init+0xb3/0x420 net/core/net_namespace.c:137
       setup_net+0x2d5/0x8b0 net/core/net_namespace.c:327
       copy_net_ns+0x29e/0x5a0 net/core/net_namespace.c:468
       create_new_namespaces+0x403/0xb50 kernel/nsproxy.c:108
       unshare_nsproxy_namespaces+0xc2/0x200 kernel/nsproxy.c:229
       ksys_unshare+0x444/0x980 kernel/fork.c:2955
       __do_sys_unshare kernel/fork.c:3023 [inline]
       __se_sys_unshare kernel/fork.c:3021 [inline]
       __x64_sys_unshare+0x31/0x40 kernel/fork.c:3021
       do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
       entry_SYSCALL_64_after_hwframe+0x49/0xbe
    
    Link: https://lore.kernel.org/r/20200309193200.GA10633@ziepe.ca
    Cc: stable@kernel.org
    Fixes: 4e0f7b90 ("RDMA/core: Implement compat device/sysfs tree in net namespace")
    Reported-by: syzbot+ab4dae63f7d310641ded@syzkaller.appspotmail.com
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    f2f2b3bb
  • Jason Gunthorpe's avatar
    RDMA/mad: Do not crash if the rdma device does not have a umad interface · 5bdfa854
    Jason Gunthorpe authored
    Non-IB devices do not have a umad interface and the client_data will be
    left set to NULL. In this case calling get_nl_info() will try to kref a
    NULL cdev causing a crash:
    
      general protection fault, probably for non-canonical address 0xdffffc00000000ba: 0000 [#1] PREEMPT SMP KASAN
      KASAN: null-ptr-deref in range [0x00000000000005d0-0x00000000000005d7]
      CPU: 0 PID: 20851 Comm: syz-executor.0 Not tainted 5.6.0-rc2-syzkaller #0
      Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
      RIP: 0010:kobject_get+0x35/0x150 lib/kobject.c:640
      Code: 53 e8 3f b0 8b f9 4d 85 e4 0f 84 a2 00 00 00 e8 31 b0 8b f9 49 8d 7c 24 3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f  b6 04 02 48 89 fa
    +83 e2 07 38 d0 7f 08 84 c0 0f 85 eb 00 00 00
      RSP: 0018:ffffc9000946f1a0 EFLAGS: 00010203
      RAX: dffffc0000000000 RBX: ffffffff85bdbbb0 RCX: ffffc9000bf22000
      RDX: 00000000000000ba RSI: ffffffff87e9d78f RDI: 00000000000005d4
      RBP: ffffc9000946f1b8 R08: ffff8880581a6440 R09: ffff8880581a6cd0
      R10: fffffbfff154b838 R11: ffffffff8aa5c1c7 R12: 0000000000000598
      R13: 0000000000000000 R14: ffffc9000946f278 R15: ffff88805cb0c4d0
      FS:  00007faa9e8af700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
      CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
      CR2: 0000001b30121000 CR3: 000000004515d000 CR4: 00000000001406f0
      DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
      DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
      Call Trace:
       get_device+0x25/0x40 drivers/base/core.c:2574
       __ib_get_client_nl_info+0x205/0x2e0 drivers/infiniband/core/device.c:1861
       ib_get_client_nl_info+0x35/0x180 drivers/infiniband/core/device.c:1881
       nldev_get_chardev+0x575/0xac0 drivers/infiniband/core/nldev.c:1621
       rdma_nl_rcv_msg drivers/infiniband/core/netlink.c:195 [inline]
       rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline]
       rdma_nl_rcv+0x5d9/0x980 drivers/infiniband/core/netlink.c:259
       netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
       netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1329
       netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1918
       sock_sendmsg_nosec net/socket.c:652 [inline]
       sock_sendmsg+0xd7/0x130 net/socket.c:672
       ____sys_sendmsg+0x753/0x880 net/socket.c:2343
       ___sys_sendmsg+0x100/0x170 net/socket.c:2397
       __sys_sendmsg+0x105/0x1d0 net/socket.c:2430
       __do_sys_sendmsg net/socket.c:2439 [inline]
       __se_sys_sendmsg net/socket.c:2437 [inline]
       __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
       do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
       entry_SYSCALL_64_after_hwframe+0x49/0xbe
    
    Cc: stable@kernel.org
    Fixes: 8f71bb00 ("RDMA: Report available cdevs through RDMA_NLDEV_CMD_GET_CHARDEV")
    Link: https://lore.kernel.org/r/20200310075339.238090-1-leon@kernel.org
    Reported-by: syzbot+46fe08363dbba223dec5@syzkaller.appspotmail.com
    Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    5bdfa854
  • Kaike Wan's avatar
    IB/rdmavt: Free kernel completion queue when done · 941224e0
    Kaike Wan authored
    When a kernel ULP requests the rdmavt to create a completion queue, it
    allocated the queue and set cq->kqueue to point to it. However, when the
    completion queue is destroyed, cq->queue is freed instead, leading to a
    memory leak:
    
    https://lore.kernel.org/r/215235485.15264050.1583334487658.JavaMail.zimbra@redhat.com
    
     unreferenced object 0xffffc90006639000 (size 12288):
     comm "kworker/u128:0", pid 8, jiffies 4295777598 (age 589.085s)
        hex dump (first 32 bytes):
          4d 00 00 00 4d 00 00 00 00 c0 08 ac 8b 88 ff ff  M...M...........
          00 00 00 00 80 00 00 00 00 00 00 00 10 00 00 00  ................
        backtrace:
          [<0000000035a3d625>] __vmalloc_node_range+0x361/0x720
          [<000000002942ce4f>] __vmalloc_node.constprop.30+0x63/0xb0
          [<00000000f228f784>] rvt_create_cq+0x98a/0xd80 [rdmavt]
          [<00000000b84aec66>] __ib_alloc_cq_user+0x281/0x1260 [ib_core]
          [<00000000ef3764be>] nvme_rdma_cm_handler+0xdb7/0x1b80 [nvme_rdma]
          [<00000000936b401c>] cma_cm_event_handler+0xb7/0x550 [rdma_cm]
          [<00000000d9c40b7b>] addr_handler+0x195/0x310 [rdma_cm]
          [<00000000c7398a03>] process_one_req+0xdd/0x600 [ib_core]
          [<000000004d29675b>] process_one_work+0x920/0x1740
          [<00000000efedcdb5>] worker_thread+0x87/0xb40
          [<000000005688b340>] kthread+0x327/0x3f0
          [<0000000043a168d6>] ret_from_fork+0x3a/0x50
    
    This patch fixes the issue by freeing cq->kqueue instead.
    
    Fixes: 239b0e52 ("IB/hfi1: Move rvt_cq_wc struct into uapi directory")
    Link: https://lore.kernel.org/r/20200313123957.14343.43879.stgit@awfm-01.aw.intel.com
    Cc: <stable@vger.kernel.org> # 5.4.x
    Reported-by: default avatarYi Zhang <yi.zhang@redhat.com>
    Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
    Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
    Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
    Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    941224e0
  • Mike Marciniszyn's avatar
    IB/hfi1: Ensure pq is not left on waitlist · 9a293d1e
    Mike Marciniszyn authored
    The following warning can occur when a pq is left on the dmawait list and
    the pq is then freed:
    
      WARNING: CPU: 47 PID: 3546 at lib/list_debug.c:29 __list_add+0x65/0xc0
      list_add corruption. next->prev should be prev (ffff939228da1880), but was ffff939cabb52230. (next=ffff939cabb52230).
      Modules linked in: mmfs26(OE) mmfslinux(OE) tracedev(OE) 8021q garp mrp ib_isert iscsi_target_mod target_core_mod crc_t10dif crct10dif_generic opa_vnic rpcrdma ib_iser libiscsi scsi_transport_iscsi ib_ipoib(OE) bridge stp llc iTCO_wdt iTCO_vendor_support intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crct10dif_pclmul crct10dif_common crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd ast ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm pcspkr joydev drm_panel_orientation_quirks i2c_i801 mei_me lpc_ich mei wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_power_meter acpi_pad hfi1(OE) rdmavt(OE) rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core binfmt_misc numatools(OE) xpmem(OE) ip_tables
      nfsv3 nfs_acl nfs lockd grace sunrpc fscache igb ahci libahci i2c_algo_bit dca libata ptp pps_core crc32c_intel [last unloaded: i2c_algo_bit]
      CPU: 47 PID: 3546 Comm: wrf.exe Kdump: loaded Tainted: G W OE ------------ 3.10.0-957.41.1.el7.x86_64 #1
      Hardware name: HPE.COM HPE SGI 8600-XA730i Gen10/X11DPT-SB-SG007, BIOS SBED1229 01/22/2019
      Call Trace:
      [<ffffffff91f65ac0>] dump_stack+0x19/0x1b
      [<ffffffff91898b78>] __warn+0xd8/0x100
      [<ffffffff91898bff>] warn_slowpath_fmt+0x5f/0x80
      [<ffffffff91a1dabe>] ? ___slab_alloc+0x24e/0x4f0
      [<ffffffff91b97025>] __list_add+0x65/0xc0
      [<ffffffffc03926a5>] defer_packet_queue+0x145/0x1a0 [hfi1]
      [<ffffffffc0372987>] sdma_check_progress+0x67/0xa0 [hfi1]
      [<ffffffffc03779d2>] sdma_send_txlist+0x432/0x550 [hfi1]
      [<ffffffff91a20009>] ? kmem_cache_alloc+0x179/0x1f0
      [<ffffffffc0392973>] ? user_sdma_send_pkts+0xc3/0x1990 [hfi1]
      [<ffffffffc0393e3a>] user_sdma_send_pkts+0x158a/0x1990 [hfi1]
      [<ffffffff918ab65e>] ? try_to_del_timer_sync+0x5e/0x90
      [<ffffffff91a3fe1a>] ? __check_object_size+0x1ca/0x250
      [<ffffffffc0395546>] hfi1_user_sdma_process_request+0xd66/0x1280 [hfi1]
      [<ffffffffc034e0da>] hfi1_aio_write+0xca/0x120 [hfi1]
      [<ffffffff91a4245b>] do_sync_readv_writev+0x7b/0xd0
      [<ffffffff91a4409e>] do_readv_writev+0xce/0x260
      [<ffffffff918df69f>] ? pick_next_task_fair+0x5f/0x1b0
      [<ffffffff918db535>] ? sched_clock_cpu+0x85/0xc0
      [<ffffffff91f6b16a>] ? __schedule+0x13a/0x860
      [<ffffffff91a442c5>] vfs_writev+0x35/0x60
      [<ffffffff91a4447f>] SyS_writev+0x7f/0x110
      [<ffffffff91f78ddb>] system_call_fastpath+0x22/0x27
    
    The issue happens when wait_event_interruptible_timeout() returns a value
    <= 0.
    
    In that case, the pq is left on the list. The code continues sending
    packets and potentially can complete the current request with the pq still
    on the dmawait list provided no descriptor shortage is seen.
    
    If the pq is torn down in that state, the sdma interrupt handler could
    find the now freed pq on the list with list corruption or memory
    corruption resulting.
    
    Fix by adding a flush routine to ensure that the pq is never on a list
    after processing a request.
    
    A follow-up patch series will address issues with seqlock surfaced in:
    https://lore.kernel.org/r/20200320003129.GP20941@ziepe.ca
    
    The seqlock use for sdma will then be converted to a spin lock since the
    list_empty() doesn't need the protection afforded by the sequence lock
    currently in use.
    
    Fixes: a0d40693 ("staging/rdma/hfi1: Add page lock limit check for SDMA requests")
    Link: https://lore.kernel.org/r/20200320200200.23203.37777.stgit@awfm-01.aw.intel.comReviewed-by: default avatarKaike Wan <kaike.wan@intel.com>
    Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
    Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    9a293d1e
  • Leon Romanovsky's avatar
    MAINTAINERS: Clean RXE section and add Zhu as RXE maintainer · 1fa70778
    Leon Romanovsky authored
    Zhu Yanjun contributed many patches to RXE and expressed genuine interest
    in improve RXE even more. Let's add him as a maintainer.
    
    Link: https://lore.kernel.org/r/20200312083658.29603-1-leon@kernel.orgSigned-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
    Acked-by: default avatarMoni Shoua <monis@mellanox.com>
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    1fa70778
  • Mike Marciniszyn's avatar
    RDMA/core: Ensure security pkey modify is not lost · 2d47fbac
    Mike Marciniszyn authored
    The following modify sequence (loosely based on ipoib) will lose a pkey
    modifcation:
    
    - Modify (pkey index, port)
    - Modify (new pkey index, NO port)
    
    After the first modify, the qp_pps list will have saved the pkey and the
    unit on the main list.
    
    During the second modify, get_new_pps() will fetch the port from qp_pps
    and read the new pkey index from qp_attr->pkey_index.  The state will
    still be zero, or IB_PORT_PKEY_NOT_VALID. Because of the invalid state,
    the new values will never replace the one in the qp pps list, losing the
    new pkey.
    
    This happens because the following if statements will never correct the
    state because the first term will be false. If the code had been executed,
    it would incorrectly overwrite valid values.
    
      if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT))
    	  new_pps->main.state = IB_PORT_PKEY_VALID;
    
      if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) {
    	  new_pps->main.port_num = qp_pps->main.port_num;
    	  new_pps->main.pkey_index = qp_pps->main.pkey_index;
    	  if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
    		  new_pps->main.state = IB_PORT_PKEY_VALID;
      }
    
    Fix by joining the two if statements with an or test to see if qp_pps is
    non-NULL and in the correct state.
    
    Fixes: 1dd01788 ("RDMA/core: Fix protection fault in get_pkey_idx_qp_list")
    Link: https://lore.kernel.org/r/20200313124704.14982.55907.stgit@awfm-01.aw.intel.comReviewed-by: default avatarKaike Wan <kaike.wan@intel.com>
    Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
    Reviewed-by: default avatarLeon Romanovsky <leonro@mellanox.com>
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    2d47fbac
  • Leon Romanovsky's avatar
    RDMA/mlx5: Fix access to wrong pointer while performing flush due to error · 950bf4f1
    Leon Romanovsky authored
    The main difference between send and receive SW completions is related to
    separate treatment of WQ queue. For receive completions, the initial index
    to be flushed is stored in "tail", while for send completions, it is in
    deleted "last_poll".
    
      CPU: 54 PID: 53405 Comm: kworker/u161:0 Kdump: loaded Tainted: G           OE    --------- -t - 4.18.0-147.el8.ppc64le #1
      Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core]
      NIP:  c000003c7c00a000 LR: c00800000e586af4 CTR: c000003c7c00a000
      REGS: c0000036cc9db940 TRAP: 0400   Tainted: G           OE    --------- -t -  (4.18.0-147.el8.ppc64le)
      MSR:  9000000010009033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 24004488  XER: 20040000
      CFAR: c00800000e586af0 IRQMASK: 0
      GPR00: c00800000e586ab4 c0000036cc9dbbc0 c00800000e5f1a00 c0000037d8433800
      GPR04: c000003895a26800 c0000037293f2000 0000000000000201 0000000000000011
      GPR08: c000003895a26c80 c000003c7c00a000 0000000000000000 c00800000ed30438
      GPR12: c000003c7c00a000 c000003fff684b80 c00000000017c388 c00000396ec4be40
      GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
      GPR20: c00000000151e498 0000000000000010 c000003895a26848 0000000000000010
      GPR24: 0000000000000010 0000000000010000 c000003895a26800 0000000000000000
      GPR28: 0000000000000010 c0000037d8433800 c000003895a26c80 c000003895a26800
      NIP [c000003c7c00a000] 0xc000003c7c00a000
      LR [c00800000e586af4] __ib_process_cq+0xec/0x1b0 [ib_core]
      Call Trace:
      [c0000036cc9dbbc0] [c00800000e586ab4] __ib_process_cq+0xac/0x1b0 [ib_core] (unreliable)
      [c0000036cc9dbc40] [c00800000e586c88] ib_cq_poll_work+0x40/0xb0 [ib_core]
      [c0000036cc9dbc70] [c000000000171f44] process_one_work+0x2f4/0x5c0
      [c0000036cc9dbd10] [c000000000172a0c] worker_thread+0xcc/0x760
      [c0000036cc9dbdc0] [c00000000017c52c] kthread+0x1ac/0x1c0
      [c0000036cc9dbe30] [c00000000000b75c] ret_from_kernel_thread+0x5c/0x80
    
    Fixes: 8e3b6883 ("RDMA/mlx5: Delete unreachable handle_atomic code by simplifying SW completion")
    Link: https://lore.kernel.org/r/20200318091640.44069-1-leon@kernel.orgSigned-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    950bf4f1
  • Maor Gottlieb's avatar
    RDMA/mlx5: Block delay drop to unprivileged users · ba80013f
    Maor Gottlieb authored
    It has been discovered that this feature can globally block the RX port,
    so it should be allowed for highly privileged users only.
    
    Fixes: 03404e8a("IB/mlx5: Add support to dropless RQ")
    Link: https://lore.kernel.org/r/20200322124906.1173790-1-leon@kernel.orgSigned-off-by: default avatarMaor Gottlieb <maorg@mellanox.com>
    Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
    Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
    ba80013f
  • Linus Torvalds's avatar
    Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma · 9420e8ad
    Linus Torvalds authored
    Pull rdma fixes from Jason Gunthorpe:
     "A small set of late-rc patches, mostly fixes for various crashers,
      some syzkaller fixes and a mlx5 HW limitation:
    
       - Several MAINTAINERS updates
    
       - Memory leak regression in ODP
    
       - Several fixes for syzkaller related crashes. Google recently taught
         syzkaller to create the software RDMA devices
    
       - Crash fixes for HFI1
    
       - Several fixes for mlx5 crashes
    
       - Prevent unprivileged access to an unsafe mlx5 HW resource"
    
    * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
      RDMA/mlx5: Block delay drop to unprivileged users
      RDMA/mlx5: Fix access to wrong pointer while performing flush due to error
      RDMA/core: Ensure security pkey modify is not lost
      MAINTAINERS: Clean RXE section and add Zhu as RXE maintainer
      IB/hfi1: Ensure pq is not left on waitlist
      IB/rdmavt: Free kernel completion queue when done
      RDMA/mad: Do not crash if the rdma device does not have a umad interface
      RDMA/core: Fix missing error check on dev_set_name()
      RDMA/nl: Do not permit empty devices names during RDMA_NLDEV_CMD_NEWLINK/SET
      RDMA/mlx5: Fix the number of hwcounters of a dynamic counter
      MAINTAINERS: Update maintainers for HISILICON ROCE DRIVER
      RDMA/odp: Fix leaking the tgid for implicit ODP
    9420e8ad
......@@ -7579,7 +7579,8 @@ F: Documentation/admin-guide/perf/hisi-pmu.rst
HISILICON ROCE DRIVER
M: Lijun Ou <oulijun@huawei.com>
M: Wei Hu(Xavier) <xavier.huwei@huawei.com>
M: Wei Hu(Xavier) <huwei87@hisilicon.com>
M: Weihang Li <liweihang@huawei.com>
L: linux-rdma@vger.kernel.org
S: Maintained
F: drivers/infiniband/hw/hns/
......@@ -15421,11 +15422,9 @@ F: drivers/infiniband/sw/siw/
F: include/uapi/rdma/siw-abi.h
SOFT-ROCE DRIVER (rxe)
M: Moni Shoua <monis@mellanox.com>
M: Zhu Yanjun <yanjunz@mellanox.com>
L: linux-rdma@vger.kernel.org
S: Supported
W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
Q: http://patchwork.kernel.org/project/linux-rdma/list/
F: drivers/infiniband/sw/rxe/
F: include/uapi/rdma/rdma_user_rxe.h
......
......@@ -896,7 +896,9 @@ static int add_one_compat_dev(struct ib_device *device,
cdev->dev.parent = device->dev.parent;
rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
cdev->dev.release = compatdev_release;
dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
if (ret)
goto add_err;
ret = device_add(&cdev->dev);
if (ret)
......
......@@ -918,6 +918,10 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
IB_DEVICE_NAME_MAX);
if (strlen(name) == 0) {
err = -EINVAL;
goto done;
}
err = ib_device_rename(device, name);
goto done;
}
......@@ -1514,7 +1518,7 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
sizeof(ibdev_name));
if (strchr(ibdev_name, '%'))
if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
return -EINVAL;
nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
......
......@@ -349,16 +349,11 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
else if (qp_pps)
new_pps->main.pkey_index = qp_pps->main.pkey_index;
if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT))
if (((qp_attr_mask & IB_QP_PKEY_INDEX) &&
(qp_attr_mask & IB_QP_PORT)) ||
(qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID))
new_pps->main.state = IB_PORT_PKEY_VALID;
if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) {
new_pps->main.port_num = qp_pps->main.port_num;
new_pps->main.pkey_index = qp_pps->main.pkey_index;
if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
new_pps->main.state = IB_PORT_PKEY_VALID;
}
if (qp_attr_mask & IB_QP_ALT_PATH) {
new_pps->alt.port_num = qp_attr->alt_port_num;
new_pps->alt.pkey_index = qp_attr->alt_pkey_index;
......
......@@ -275,8 +275,8 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
mmu_interval_notifier_remove(&umem_odp->notifier);
kvfree(umem_odp->dma_list);
kvfree(umem_odp->page_list);
put_pid(umem_odp->tgid);
}
put_pid(umem_odp->tgid);
kfree(umem_odp);
}
EXPORT_SYMBOL(ib_umem_odp_release);
......
......@@ -1129,17 +1129,30 @@ static const struct file_operations umad_sm_fops = {
.llseek = no_llseek,
};
static struct ib_umad_port *get_port(struct ib_device *ibdev,
struct ib_umad_device *umad_dev,
unsigned int port)
{
if (!umad_dev)
return ERR_PTR(-EOPNOTSUPP);
if (!rdma_is_port_valid(ibdev, port))
return ERR_PTR(-EINVAL);
if (!rdma_cap_ib_mad(ibdev, port))
return ERR_PTR(-EOPNOTSUPP);
return &umad_dev->ports[port - rdma_start_port(ibdev)];
}
static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data,
struct ib_client_nl_info *res)
{
struct ib_umad_device *umad_dev = client_data;
struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
if (!rdma_is_port_valid(ibdev, res->port))
return -EINVAL;
if (IS_ERR(port))
return PTR_ERR(port);
res->abi = IB_USER_MAD_ABI_VERSION;
res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].dev;
res->cdev = &port->dev;
return 0;
}
......@@ -1154,15 +1167,13 @@ MODULE_ALIAS_RDMA_CLIENT("umad");
static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data,
struct ib_client_nl_info *res)
{
struct ib_umad_device *umad_dev =
ib_get_client_data(ibdev, &umad_client);
struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
if (!rdma_is_port_valid(ibdev, res->port))
return -EINVAL;
if (IS_ERR(port))
return PTR_ERR(port);
res->abi = IB_USER_MAD_ABI_VERSION;
res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].sm_dev;
res->cdev = &port->sm_dev;
return 0;
}
......
......@@ -141,6 +141,7 @@ static int defer_packet_queue(
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
if (list_empty(&pq->busy.list)) {
pq->busy.lock = &sde->waitlock;
iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
}
......@@ -155,6 +156,7 @@ static void activate_packet_queue(struct iowait *wait, int reason)
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
pq->busy.lock = NULL;
xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
wake_up(&wait->wait_dma);
};
......@@ -256,6 +258,21 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
return ret;
}
static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq)
{
unsigned long flags;
seqlock_t *lock = pq->busy.lock;
if (!lock)
return;
write_seqlock_irqsave(lock, flags);
if (!list_empty(&pq->busy.list)) {
list_del_init(&pq->busy.list);
pq->busy.lock = NULL;
}
write_sequnlock_irqrestore(lock, flags);
}
int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
......@@ -281,6 +298,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
kfree(pq->reqs);
kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
flush_pq_iowait(pq);
kfree(pq);
} else {
spin_unlock(&fd->pq_rcu_lock);
......@@ -587,11 +605,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
if (ret < 0) {
if (ret != -EBUSY)
goto free_req;
wait_event_interruptible_timeout(
if (wait_event_interruptible_timeout(
pq->busy.wait_dma,
(pq->state == SDMA_PKT_Q_ACTIVE),
pq->state == SDMA_PKT_Q_ACTIVE,
msecs_to_jiffies(
SDMA_IOWAIT_TIMEOUT));
SDMA_IOWAIT_TIMEOUT)) <= 0)
flush_pq_iowait(pq);
}
}
*count += idx;
......
......@@ -330,6 +330,22 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
dump_cqe(dev, cqe);
}
static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
u16 tail, u16 head)
{
u16 idx;
do {
idx = tail & (qp->sq.wqe_cnt - 1);
if (idx == head)
break;
tail = qp->sq.w_list[idx].next;
} while (1);
tail = qp->sq.w_list[idx].next;
qp->sq.last_poll = tail;
}
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
......@@ -368,7 +384,7 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
}
static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
int *npolled, int is_send)
int *npolled, bool is_send)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
......@@ -383,10 +399,16 @@ static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
return;
for (i = 0; i < cur && np < num_entries; i++) {
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
unsigned int idx;
idx = (is_send) ? wq->last_poll : wq->tail;
idx &= (wq->wqe_cnt - 1);
wc->wr_id = wq->wrid[idx];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
if (is_send)
wq->last_poll = wq->w_list[idx].next;
np++;
wc->qp = &qp->ibqp;
wc++;
......@@ -473,6 +495,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
idx = wqe_ctr & (wq->wqe_cnt - 1);
handle_good_req(wc, cqe64, wq, idx);
handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
wc->wr_id = wq->wrid[idx];
wq->tail = wq->wqe_head[idx] + 1;
wc->status = IB_WC_SUCCESS;
......
......@@ -5722,9 +5722,10 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
const struct mlx5_ib_counters *cnts =
get_counters(dev, counter->port - 1);
/* Q counters are in the beginning of all counters */
return rdma_alloc_hw_stats_struct(cnts->names,
cnts->num_q_counters,
cnts->num_q_counters +
cnts->num_cong_counters +
cnts->num_ext_ppcnt_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
......
......@@ -288,6 +288,7 @@ struct mlx5_ib_wq {
unsigned head;
unsigned tail;
u16 cur_post;
u16 last_poll;
void *cur_edge;
};
......
......@@ -3775,6 +3775,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->sq.cur_post = 0;
if (qp->sq.wqe_cnt)
qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
qp->sq.last_poll = 0;
qp->db.db[MLX5_RCV_DBR] = 0;
qp->db.db[MLX5_SND_DBR] = 0;
}
......@@ -6204,6 +6205,10 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
if (!capable(CAP_SYS_RAWIO) &&
init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP)
return ERR_PTR(-EPERM);
dev = to_mdev(pd->device);
switch (init_attr->wq_type) {
case IB_WQT_RQ:
......
......@@ -327,7 +327,7 @@ void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
if (cq->ip)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
vfree(cq->queue);
vfree(cq->kqueue);
}
/**
......