Files
lammps/lib/gpu/neighbor_cpu_ptx.h

87 lines
2.6 KiB
C

const char * neighbor_cpu =
" .version 2.3\n"
" .target sm_20\n"
" .address_size 64\n"
" .entry kernel_unpack (\n"
" .param .u64 __cudaparm_kernel_unpack_dev_nbor,\n"
" .param .u64 __cudaparm_kernel_unpack_dev_ij,\n"
" .param .s32 __cudaparm_kernel_unpack_inum,\n"
" .param .s32 __cudaparm_kernel_unpack_t_per_atom)\n"
" {\n"
" .reg .u32 %r<19>;\n"
" .reg .u64 %rd<33>;\n"
" .reg .pred %p<5>;\n"
" .loc 16 21 0\n"
"$LDWbegin_kernel_unpack:\n"
" ld.param.s32 %r1, [__cudaparm_kernel_unpack_t_per_atom];\n"
" cvt.s32.u32 %r2, %tid.x;\n"
" div.s32 %r3, %r2, %r1;\n"
" cvt.s32.u32 %r4, %ntid.x;\n"
" div.s32 %r5, %r4, %r1;\n"
" cvt.s32.u32 %r6, %ctaid.x;\n"
" mul.lo.s32 %r7, %r6, %r5;\n"
" add.s32 %r8, %r3, %r7;\n"
" ld.param.s32 %r9, [__cudaparm_kernel_unpack_inum];\n"
" setp.ge.s32 %p1, %r8, %r9;\n"
" @%p1 bra $Lt_0_2050;\n"
" .loc 16 30 0\n"
" cvt.s64.s32 %rd1, %r9;\n"
" ld.param.u64 %rd2, [__cudaparm_kernel_unpack_dev_nbor];\n"
" cvt.s64.s32 %rd3, %r8;\n"
" add.u64 %rd4, %rd3, %rd1;\n"
" mul.lo.u64 %rd5, %rd4, 4;\n"
" add.u64 %rd6, %rd2, %rd5;\n"
" mul.wide.s32 %rd7, %r9, 4;\n"
" add.u64 %rd8, %rd6, %rd7;\n"
" ld.param.u64 %rd9, [__cudaparm_kernel_unpack_dev_ij];\n"
" ld.global.s32 %r10, [%rd8+0];\n"
" cvt.s64.s32 %rd10, %r10;\n"
" mul.wide.s32 %rd11, %r10, 4;\n"
" add.u64 %rd12, %rd9, %rd11;\n"
" .loc 16 31 0\n"
" ld.global.s32 %r11, [%rd6+0];\n"
" cvt.s64.s32 %rd13, %r11;\n"
" mul.wide.s32 %rd14, %r11, 4;\n"
" add.u64 %rd15, %rd12, %rd14;\n"
" .loc 16 33 0\n"
" sub.s32 %r12, %r1, 1;\n"
" and.b32 %r13, %r12, %r2;\n"
" mul.lo.s32 %r14, %r12, %r8;\n"
" add.s32 %r15, %r13, %r14;\n"
" cvt.s64.s32 %rd16, %r15;\n"
" mul.wide.s32 %rd17, %r15, 4;\n"
" add.u64 %rd18, %rd8, %rd17;\n"
" .loc 16 34 0\n"
" cvt.s64.s32 %rd19, %r13;\n"
" mul.wide.s32 %rd20, %r13, 4;\n"
" add.u64 %rd21, %rd12, %rd20;\n"
" setp.ge.u64 %p2, %rd21, %rd15;\n"
" @%p2 bra $Lt_0_2562;\n"
" sub.u64 %rd22, %rd15, %rd21;\n"
" add.u64 %rd23, %rd22, 3;\n"
" shr.s64 %rd24, %rd23, 63;\n"
" mov.s64 %rd25, 3;\n"
" and.b64 %rd26, %rd24, %rd25;\n"
" add.s64 %rd27, %rd26, %rd23;\n"
" shr.s64 %rd28, %rd27, 2;\n"
" mul.lo.s32 %r16, %r9, %r1;\n"
" mov.s64 %rd29, %rd28;\n"
"$Lt_0_3074:\n"
" .loc 16 37 0\n"
" ld.global.s32 %r17, [%rd21+0];\n"
" st.global.s32 [%rd18+0], %r17;\n"
" .loc 16 38 0\n"
" cvt.s64.s32 %rd30, %r16;\n"
" mul.wide.s32 %rd31, %r16, 4;\n"
" add.u64 %rd18, %rd18, %rd31;\n"
" add.u64 %rd21, %rd21, 4;\n"
" setp.ne.u64 %p3, %rd21, %rd15;\n"
" @%p3 bra $Lt_0_3074;\n"
"$Lt_0_2562:\n"
"$Lt_0_2050:\n"
" .loc 16 41 0\n"
" exit;\n"
"$LDWend_kernel_unpack:\n"
" }\n"
;