const char * neighbor_cpu = " .version 2.3\n" " .target sm_20\n" " .address_size 64\n" " .entry kernel_unpack (\n" " .param .u64 __cudaparm_kernel_unpack_dev_nbor,\n" " .param .u64 __cudaparm_kernel_unpack_dev_ij,\n" " .param .s32 __cudaparm_kernel_unpack_inum,\n" " .param .s32 __cudaparm_kernel_unpack_t_per_atom)\n" " {\n" " .reg .u32 %r<19>;\n" " .reg .u64 %rd<33>;\n" " .reg .pred %p<5>;\n" " .loc 16 21 0\n" "$LDWbegin_kernel_unpack:\n" " ld.param.s32 %r1, [__cudaparm_kernel_unpack_t_per_atom];\n" " cvt.s32.u32 %r2, %tid.x;\n" " div.s32 %r3, %r2, %r1;\n" " cvt.s32.u32 %r4, %ntid.x;\n" " div.s32 %r5, %r4, %r1;\n" " cvt.s32.u32 %r6, %ctaid.x;\n" " mul.lo.s32 %r7, %r6, %r5;\n" " add.s32 %r8, %r3, %r7;\n" " ld.param.s32 %r9, [__cudaparm_kernel_unpack_inum];\n" " setp.ge.s32 %p1, %r8, %r9;\n" " @%p1 bra $Lt_0_2050;\n" " .loc 16 30 0\n" " cvt.s64.s32 %rd1, %r9;\n" " ld.param.u64 %rd2, [__cudaparm_kernel_unpack_dev_nbor];\n" " cvt.s64.s32 %rd3, %r8;\n" " add.u64 %rd4, %rd3, %rd1;\n" " mul.lo.u64 %rd5, %rd4, 4;\n" " add.u64 %rd6, %rd2, %rd5;\n" " mul.wide.s32 %rd7, %r9, 4;\n" " add.u64 %rd8, %rd6, %rd7;\n" " ld.param.u64 %rd9, [__cudaparm_kernel_unpack_dev_ij];\n" " ld.global.s32 %r10, [%rd8+0];\n" " cvt.s64.s32 %rd10, %r10;\n" " mul.wide.s32 %rd11, %r10, 4;\n" " add.u64 %rd12, %rd9, %rd11;\n" " .loc 16 31 0\n" " ld.global.s32 %r11, [%rd6+0];\n" " cvt.s64.s32 %rd13, %r11;\n" " mul.wide.s32 %rd14, %r11, 4;\n" " add.u64 %rd15, %rd12, %rd14;\n" " .loc 16 33 0\n" " sub.s32 %r12, %r1, 1;\n" " and.b32 %r13, %r12, %r2;\n" " mul.lo.s32 %r14, %r12, %r8;\n" " add.s32 %r15, %r13, %r14;\n" " cvt.s64.s32 %rd16, %r15;\n" " mul.wide.s32 %rd17, %r15, 4;\n" " add.u64 %rd18, %rd8, %rd17;\n" " .loc 16 34 0\n" " cvt.s64.s32 %rd19, %r13;\n" " mul.wide.s32 %rd20, %r13, 4;\n" " add.u64 %rd21, %rd12, %rd20;\n" " setp.ge.u64 %p2, %rd21, %rd15;\n" " @%p2 bra $Lt_0_2562;\n" " sub.u64 %rd22, %rd15, %rd21;\n" " add.u64 %rd23, %rd22, 3;\n" " shr.s64 %rd24, %rd23, 63;\n" " mov.s64 %rd25, 3;\n" " and.b64 %rd26, %rd24, %rd25;\n" " add.s64 %rd27, %rd26, %rd23;\n" " shr.s64 %rd28, %rd27, 2;\n" " mul.lo.s32 %r16, %r9, %r1;\n" " mov.s64 %rd29, %rd28;\n" "$Lt_0_3074:\n" " .loc 16 37 0\n" " ld.global.s32 %r17, [%rd21+0];\n" " st.global.s32 [%rd18+0], %r17;\n" " .loc 16 38 0\n" " cvt.s64.s32 %rd30, %r16;\n" " mul.wide.s32 %rd31, %r16, 4;\n" " add.u64 %rd18, %rd18, %rd31;\n" " add.u64 %rd21, %rd21, 4;\n" " setp.ne.u64 %p3, %rd21, %rd15;\n" " @%p3 bra $Lt_0_3074;\n" "$Lt_0_2562:\n" "$Lt_0_2050:\n" " .loc 16 41 0\n" " exit;\n" "$LDWend_kernel_unpack:\n" " }\n" ;