From b63acf68437bffb59f91694179e0621fa65dcfad Mon Sep 17 00:00:00 2001 From: Rodolfo Leite Date: Tue, 24 Oct 2017 11:11:10 -0200 Subject: [PATCH 1/6] Added the UFM files (doc/src - lib/gpu - src) --- doc/src/Eqs/pair_ufm.jpg | Bin 0 -> 17830 bytes doc/src/Eqs/pair_ufm.tex | 14 ++ doc/src/pair_ufm.txt | 135 ++++++++++++ lib/gpu/Nvidia.makefile | 18 +- lib/gpu/README | 1 + lib/gpu/lal_ufm.cpp | 172 ++++++++++++++++ lib/gpu/lal_ufm.cu | 188 +++++++++++++++++ lib/gpu/lal_ufm.h | 86 ++++++++ src/GPU/Install.sh | 2 + src/GPU/pair_ufm_gpu.cpp | 240 ++++++++++++++++++++++ src/GPU/pair_ufm_gpu.h | 65 ++++++ src/OPT/Install.sh | 2 + src/OPT/pair_ufm_opt.cpp | 197 ++++++++++++++++++ src/OPT/pair_ufm_opt.h | 45 ++++ src/USER-OMP/pair_ufm_omp.cpp | 159 ++++++++++++++ src/USER-OMP/pair_ufm_omp.h | 50 +++++ src/pair_ufm.cpp | 376 ++++++++++++++++++++++++++++++++++ src/pair_ufm.h | 76 +++++++ 18 files changed, 1824 insertions(+), 2 deletions(-) create mode 100644 doc/src/Eqs/pair_ufm.jpg create mode 100644 doc/src/Eqs/pair_ufm.tex create mode 100644 doc/src/pair_ufm.txt create mode 100644 lib/gpu/lal_ufm.cpp create mode 100644 lib/gpu/lal_ufm.cu create mode 100644 lib/gpu/lal_ufm.h create mode 100644 src/GPU/pair_ufm_gpu.cpp create mode 100644 src/GPU/pair_ufm_gpu.h create mode 100644 src/OPT/pair_ufm_opt.cpp create mode 100644 src/OPT/pair_ufm_opt.h create mode 100644 src/USER-OMP/pair_ufm_omp.cpp create mode 100644 src/USER-OMP/pair_ufm_omp.h create mode 100644 src/pair_ufm.cpp create mode 100644 src/pair_ufm.h diff --git a/doc/src/Eqs/pair_ufm.jpg b/doc/src/Eqs/pair_ufm.jpg new file mode 100644 index 0000000000000000000000000000000000000000..40273da68063274cd6d6496d38f93ff97121df91 GIT binary patch literal 17830 zcmeFYV{m89+bta1_AgE*wr$(CZQD*Jn2DXq#I|kQw(U3fAJ2KJ&iD7z*;RX2<6771 zt9RGl-PNlj739R>VQ^r8fPmm7B}9~ffPhcF(?6jgzMp@Ac0z!FU`j28g%u=)g$Wg$ z?9D7~Oo4zTB2&^JRZv%OhOWj7F|`Oml!WYqTtFnCdC>&9mZ6{%6pY|P$k|LB!PU^v zk*o8f31KS1k#LQAdh?g7>4%APE2F__a?gR{uOB<^Ho33(j}AX4*|*-0GBR3@d_aMU zf=N)w`ka6Yq2~H~Htk}D|5!kDVhR8OtK@=8;Mx6+Ku6n0RG1j|UpfY5!ClauldSUR z|I#O$ik3bA0Rqp%8C1 z=x2;V!@G&U`~U)j2`7;Q3gC$t#6DREn-3QfSkW0qg;F%jKOaAE_MkM+=bxA9~cJ-D4{&nOpBU znO&s}B0l`CAX3soq@V0l#=#7eyc4ywQ<7J7nzs2E*_`#T@`|`$Ww{5>*b#}dpDveA zaPbuYs}@1DHUJmU(lJ{#$-AT$(KHeS;g9VRa+ksjrrl4UFxE)dUIu1pxhnozL_{ok zL=~YJAFQPV+4h3){Yo}7PBa6;n*j?TV8ETj2MQ=g>^{NOiRAF*3j6)uldn1gBAN@r zGOcS1B{Q>X6G-9!p52WJ7Nmd#o(fF29gIc@flUZm4*8OcWCP4Cz_|>I5P;)=h6gFT zP16ij8I-Zj){K}PKroG{7i`D?Ge`*4E36U$Q7Vw0Kwtu*DI_zDeH-$gXFv%P6^KGe zL>lOn3$H+=0+J&9mZx=$dCcO1+=}2T7@wB7?gM5QLf^~2^X7o~jzb^_BqVkmfngNIfb33+6e+fg%vcNYLqIB# zs*u1^a5b?_0`V5&C8)>H0t;dgZ7<@cC&aM%SIRHeU&Jz;#Sn85Eio<8Cj)TCoyMga ztmV*)z+;1AMyS=;%RV+g8gaZ~yurNzYw`F4I}R?Hz>^3^!O^{ByJ|a84sDL`&1yQB zm5A&?Lwl;%p8F#wXS(c1h-+N2w6=<1(2H4ce6X0I4YDHsUr=s_0Z%w{&Kn<FYXD@-;FPbg0)C`=mKs|A#7_@Uh1EZa1~Y~}PT zx?>bXl+A$1fY3leR8~}<6pYlO)Q;3|DRil$WUpf=-JHo{in-{Mp%cVo{i8oqh7*w) zb(V4;ncU8NKc-aORO6KC6zmi_tKL;Z^L9&<3Tsu1BoBU5i7jZjCR&A^5-d>vwLheq zbj=*g(q?#iULUfmPOs6bUZrXPwZgX|ImJ2MJSBe6Jx!b6vY2MoVMSqWw9Z_@nk}^C zvW8)4veujS8Bj2)#u|xX8Z;fOPEAjZPZhE5QY%^f)fnjZ%dOHa_>mbu2|<}c&%8}8 zW3sxk*e<{>&MxcHm^Yt5Ilih%xk~ryXJ?>a$fwu``G+8=DQKf$qhMB#vhd%$=pCs( zsy=Tq4w7QUq#3({wZZC|SjMg9-se)b$B7SFy~{f? zKD6HI9`*5T@MrPjxZh7KPJiutEIT>`_5?M9Lt@@@T5>yPK5{>BFlQQPK5;%ZTeLXn z{MFHGK5f3$Dbt125Gxm1L|)WpcV@3y`_UwSiEvrAq0NhgXde#}FDDN^#W3YGb)>hd z2d(F-*SE>GS=O%YTg~Ukr|sLia%w}@mdbl>ebl!4R0|lO@2^_F3hk=tTJ9nPLb!%s z#<+IL+Z5XAEfzo;Mp(yqrWq!*FrEG_+AJy5a-y22PGIl`at{VJAUU*k725 zK?U_QBu(9-d{}3Oj0yQ5+DnbAe7(j=tP4n62bTKD=Cbzrt;BA^Qer2l0oq zn8#qG%!`bnnRu$>VsUw(P1Sjhx}ln*dc%RmLHX2)?u1r{KYTs8o9voaR)eSY+mdHh znWnr}PW|VS&fR*_dOR*MTm63NQN`HVThCi~tIuyAN53brp;&MK{&&jf8t0!2#kv`t znAb3NXz7@#7;%`6(RS}9H_9vWs2O|g?igzrS1Unl#Ot>PWn+u!C@)FpNu9R9YdB5J zjhHrH&BiZrXGM!e-tI|mQf`mQFKMh-vl|VjOCFqE?|z}$qQXU52gRfEZJupc=Z+i7 zUFyE}JDXK+U~dGMPn%mBMf$2BWa4&!2aZ)8d#$dnz)b}YIJ}>D-Vf5_^oVh9`1agAJc|BGS0)EP zR!f7#{xFs~Hb-vF&*3esyRJUDXh$25l`@-|BV;XbX*(=V%E~f)JJJ; z>BuxMFEzGbj1&z^Khe>*u6qt#jW(3&sna*qta_K5E3y_`df}#Xr`vpJ_I@CE+njiVy>jP;^1iP2mUN#I-t6lU zABo@OmGkF$G}%x-p9?Z~nv%*X?aQv{G@y@^h^~cBh`Efs|(%%hE9O0=8wJXCz|6cH* zFE4%(uqGBHmK|Rl6X#3yTz=bf2zw{}wiHnXYK*=2d{b*Lwx!ki=IhUu$izsr8^Nhu z0ThJ5!AB7A6b_Utme()CF@~Y z6POo$A4cF0bwEXV;T}LLwy8Grk(fQNLh_`)AF>|Mj|+TV;caA8ZfUfu&8l^745TGq z@^&L>nY#tCvqf<-dfDuFP5%nJlMg~Awh9mSJ&VnAT?`35Y;EkExjcA@|7F4To&Kkoo|y1oCN9>z#2T^+gu?btri83? zEOZRSd@zKBggj0rW?V`lV*i1E=Xi-NTwENu=;__v-Raz!>Fk}%=@~gWIq4ag=$V*k zzb$B;J?&f!J!tKmN&fBRzkWnaosFF=9b7E!?Fj$zYiMNe>cUG*{14IpT>oyTsfXqN zVX|}nPgvgp(*ILK&q&8W|3ALJp*;VTaw%APnA&KFSlXJ}Ie&BTv2bwm{LB9Ts`)>R z{|l-4e~^qU?Ef42ziR%2*(}@hkozd}Xq<=Hd6ls4Gi{RxSf41Tr4QEu%Z^)!9rFd;vxhCYk_Sp%5w#)He`3T z)e-`apIMSial4D_7;*dQF-k+hvCSHNoCPj10dzhjV{mxJppY8^7ho1U?^zCIbcyYz^Z*gU0ks|`? zzx!TVI%-!ilvyFY9zq;^WSHI~;{9F7=VsR6m%?4eK^1}`1c-X&;F3Og!B?&P^$kT| zp1iyB&ojZ5X?Q?q8_?dwUiD!#ea%_dG6Y-$)A(C_xBJV>JKy$`kQ@%(*8`}RBRuI7 zw5>>P3)Hmjh7M^CD5sE~n7Xp=FDo|@ad=sm^FMKq7bhrIFMlzK?~j|F^|#|0{N2G0 zeYYWvYvGvGgX1;~XwumsKUTAPUR|vnckSFdm{L}o!o9@(=w0@of>OD-tjYxIHql&? zpjfg6`YO`~#|qKD-{f$jK;<@^-_+kU(TdHSl<)K`ukw(quJdBL3+X7+g$16E-?)M3|YY4F!&aq zs*JRA07ItwM+5|PFc9$J0y=i0|J315FS&E2mh*?&@O7VR+Kenc8IQa#EPF4(I5#71 zEivGn>0d>k*wgp?5PVpyMQA7Q4CzE|HC_ufUiJ~37HE2^qeMP<;rSlTxH%7U1A{6+j?vKNrT;H|+w?E!-|2`<09r~1eIe0LbHRehcgjx@GO25*etkgzgW zZ{J^}Cwf4|k(XwGm@#C-)ZG!=Ti?0MeT;!c+Tx9;0D%&fkGHu-^RQKM4}C6~g3mSh zP*b)ZSej#)F-|^uP?XcOpY4PtPAGxoC`jactBG|`T&3ZBy5iTlTOxwD667MgA7G@L z+#zew=E#`}D%p{Z*%QI|rinGmyq}BkHa^~@5_DESoyX5Lp*nDJ=;5UNKtN7_1qOFA8!7U! z)%v4BBO)n@93n)<=OGF@I?BRXN2~5(=K@71vL)8$*s;UCDcd9FT-Lqm&QsiJ9)dEb zcWm}YOAp=D2ckxUDOy7qd^Cwr75rdtG4(>D?triT9d!G|Xl5UIj%rYs(QjOu!3nL` zw|N7#tGN~;XKj3Ah6+DZgd;5EwcglGZ_){Wrb9f*ED#P*&+^ow+B$JUt zP`Hx4INascvI(eeWkeFfx_IZfFj)f(ssl3(GP^N^QP`w{#_9Yc)_j02BK>k3zPQ9X zcqZ&am@r_#Sg%y2eoYKo!g-3N^q9&^?)h^_|K=jRATf)kd!^1?@stgP|B@@>u4HQO zzI2Ym{SuEZi$AuC|2O2uya!lTM`T@|Wl;b_3#JMzzP}uT)HEPydv9Z>te`w^@n4|J@*WNzOqoG1g(3XBxB=?yKjrJ!_LtGEgrq@V66)d1T zE87GYF0tzUQ%1K+T^8UaPOYwt|7OHTlk!6>CpfB+SMT$c0`Hj~jB!T2rnw3i?o=TI zfjw0KyA}n-QzA5_g^p(80f#zQbPtwUsAe~s-hpZZjKAs)qw0xOY`?=B3Y9c>Rp1oI z3tG9)k?#SRt5ws;Lq6K<93N%(l0V0IVbqxc`+>Lf;}snXbC8hcoX^PlY+6bZbMfQc za4#h|eBcN2VzbzG$Hr`2d(&0Z z$upmDr4}jSKGOri1DO8aSp%Ap&iPQcWC~ip zqNjiQ8*KhcyuQ|r{&Na?caZVHoT#_7utGBmh1%!%hY~QNv$}7W$}sJ9DuG&()%T_^ zYVcZ)e0^*;*wzG#{G!gmqNf)4H+`2cBpnt(8~7)ivac+mLdET2Mv??%q4LH!7eerC@0tvxwy020|%0YsTK; z$t)d|PNe^(LgQhdkn*An=&=VMm;$0WP=HHXuJq zqrZf|oD~5-nH_ipb7J4@-iHu^B*6E@5WbfJJ1mWyu{2L04xy7 zVIJKgS0~Xw+wECJf6{AGF*z(@_0Kk6#Q{LLg-uDJgdEk8B6lB8 z6?V>8qC?*sbmy?*x_YfLvWa3%BQX=D9R`Vi2q3p%DcoY+#s_Zw*do z5rf-p6J1^y+z8rZTpYzY9?ygGNZz04iE%A>JIGG^mDQ5NO$d}+So22D$_O%)N@9w$ z6UNo~s%_aPLk@D6nkBJ#$1%BHK=3%5aZ7>;1m^PD!(#B`d$M&CTr9cebk{?LNOCDv zyD`~3xioq`L)c_u>d#aQ81w?hZk}>i7hnl=C(dHElQW_u_^f!S7(srR~g(d;Q5~4aoHPdd5A#S{`3Tos+*zVK^CcOO*Wyr%hmnugB zbucxyFaDBpFqL0*_!(}HeOB8BF*$7T8!UGY5B+vF{4NL=E;iG~+h8()D|pb~fITCg z@uVJNcEbJw+Sc=EraX*$X{&r)VF=X z)pt)F901*r$w@mbc4IDxW$N6YaU-_Wh7_IGh8r<20=7+|=MD}a)U;F5{Y(UzhId|^ zHR@{TCPDQ=Eta{zf?!t`Dqx#QR#M?J+7o)PH!XRkUZP$J!_N&jgK;{s$CWap?S(Wt2F93s z(=(;TiB8zuubWQnpeDJxj>h0tV9<8bIMslcoX+4)XjaqmmG*ID%{T`yZC-aP(!NHR zLcWgnj+}W&%98YGU0lYn3y*8?bK`{*cWo=Zq|QuvbG5%$NeYL?QBn!%?Ri2GF0Npu zR#&im2Ms@q4}Pvcc%oF z%{H73{JG|rY523Q!VvkSdCw4gO=L&=N++bJtBBP`_*m5JN-3{?E#2g#%|t8x3aPfN znc+bX`NXvuo7|+$KotE-bh}&(9`EvKL5(bZxyG>5u3epN&ZM#3fvZjTc@}1^FD9V6 z{nlK^SR}1xJVM!KLWld1$(vQX&V^}?rO|*un1a#Xn0wK;C(Y(y7W1XSWOUuP(=d_N z)tNOP>Y-iuc1$m|B8eZT^z0^o?G*Ks#3Qhus0Y$Fnbj;V zp0vy76M`4UxdcuW6ZHZEzYxRymYYVS19C=lrh2y`KPqnQY0DqeTuw*ao7-LEggtvi zx%m{U+JLwBkcI|8+x58%|NfB1#zw#sicRvuW5uW6@pLJane&Jo=kJZ+J_r{Wt0^7| zpnb3Rsl74{f!KWmbPmN?Xwx8Ei+9V&OT40%^L-$>4S(~X|~7)^m5nl?MZmy<%_K9Uk|Wm2&hC?IUw(@ z#85%Qz{|d`R4P|jJ-~WsdChQ^;IUt=oT%tTyu?slcIyH1i&x+-wA(Ds@N)hPpqq4N zvKjRBqr%4>3^pycak1X!Aec@H39XodJ{@tuIal2&i8#x53&@d+K5%vzA`SPWJZGzk z8HttW@!2)++T%Ev@V@Y-Xh;2f^P5G_ruOd3xGrMi3xn}{7Ip z`<796L^OvDJ)$gp?v{6@w0g)vZUh6vq8SFd6&WoZspw%c)qiOf=i(#@PUB&Z8t;AY z@9OET5cYaMGq7G?QDX4d3)Tgsn<;VK z`sQ~w9Ii(FLPYfYE{>5IT;~PNr%FAJV!el@v_dP^8hfV)Apr4n-GjFNL?d2Id}-xT zriu%;xuvC`@bI*o&sp)Mb(`KcxnwQZ?ru*+9+vZ>A&|b&6BLNuwO!Qj;@rHRhTdpp zhx*pIBrbb-eBOG@s><>EB=0hO<#o0U%_Rs)eq&<9R}_z#jZjtN-tqFfyIGfh#?>kt zfo~a*1`_fseD{gC-k42$mjypR2=5(_RibAHnm?D=eJ;T5a$Us@8O<;GH#*+u68d;r z^M5*ugOzs-rM;fVT@qw7Se+Z^rQ!kbcM+jG&2XG*6Ot8t9ru1RAdw>JkgC1BHnQt|%3LLOQnzRTT~}8EiKF zr{5dh;v+bF{GWi-EMFZuq`ib8@d5s_X9(Ni(M(*)}^Egj*{BT)Jl3{qa~5n(AE~z?|4*#u0wU?rAT}u!@p@w#O15Hmtfsiew3Q z+skb_Ow`=kVk-U~p3^94`1Fp9}m6tdp|O)m-S z%!D}@*V%#(4f<_bF>l~|3(+>04-+Op&d(%w?Cy}hdGb>2_!28bX+{`|jQMZjgSu2j zJ6Cdw%uLP7fmB6zU985SH*yFSl69FT=$(WP!PP#WEx zL1EQHa}C8{LqqWOL0?*p&!bDTwwnzGZMv|;`^kbvvk#)3h^p?Wiq^^<8^PcZ^Sgm6 zA6F4CfQ1Dw@vbv)qXfW4`F7|7Y5*!G>yv7*nb%8z zHkR|A%uhHyxm6|hFg%{KD*#A7=IcH7 zR$0>>G%*VcvDqywFvB&N9#!=~@`jopSX>Ad+4?kM#TS1}dq`FJV9$#%hYsBX0#si& z#?l$0=g;lW=)346(6!xX4%U(2CdVo_Rp-BNnEK#}2YjapDe@4m<`q!p$z1to(=*RW z;raJaIvwueO#&`?d@=q%+ah_QJd6)|D-YHM#08f+Da?H8@VDX|trlPBSkA%}?fn83 z%b^m@>2?-(D385=_5Ebm(Hc{GQL^XDqJAmZC|1b=eJ7mgij7Cu z?|rhh=1Xg}YBJQ7iu61SdD}eW+StB%^r^RN3rj!OiRw!RIGW8jy&u|W>EWy&{ZV2> zWTpVy@~y|j(Jk4%Ru5L}P2*Qp+p~A-co~(#KTt;BR$f!+JeCZWKqC$+Z&zJd$SYQB z3rt0;R?+AWE(|@7(2P#ot2_8aV0CTjl;z2r@mq^tvbMO)qQU=meApKoSXkLE6UJbuByXm2YXpCf=7aiEskONEZPn9 zT@s3igsX`EUG}k2r6$>0ZIBEYrb%-A>f>LhVo!ew0`{omI-5-;Hqr_>#JU1xKVN#8 z2VRE7xxCqiS%uZVN8IU8-O2KZf^(n#%gr~C?+lBW;+Z9TWBP}8i<|&a`UzP@Rgi|s+Uv)In#pf7d}&J|Ubn9815oB|auVh4{ z%Y`n?O|d)gvldgy4SHg*%}upFRwmb$<|b!QSTsf6a(83MN@){(#eK?PYh&)6x-W?q zVr|Jzj6#-r(+L2#S+Mv=R!1v{DbEB;sbxrc_rbaejG$zKoG3P`ZI`Qr;dWgAYcZa#YhH69CfhP1zeU8GW>W}Ki9xO=h zV^#0Y!;*d?elmjU#^Au+dpru-L;$q;ISj>GrPAZGAqzNIE#h?Yr^o8 z2u$@7DO>_W?c92=+uSi3A4GH?5B%@dug)?TUBBfUaDw8O{9+t~*9{Bl+8R=2z~$f( z>FhtsZWg;D&xy6m%?f>ZSmkm?KaVR@ef(Ge3~Ec{`__E)%iUYE`dYDeYFUM+rwS+g zVfKr5B3w_{i(lP;NtTp0gIFDf>|wKB;a%`YGF&vU;=417pZ5z4*Gq&US|#B!h!Um5 zK{aj-LP&;(TSoe#84jHDsqL}zh8(uNP~ddlN3|=GoQXT<4_0F{97^_JIWWC}TDq!w zqtMT4?w;+BXQx2!!nqgyXe9|CSP7s^BRXB-6niLI46!HRqw(R&u(L;BRW{QitoGjo z4)$X=>Gw9?1Y0bddp>6LTC=OCD@Om$fRvFTnTirCAcgWU5)!fE zgtzdo3ft0T$`rOcoY5Abu-wxy?#H5E|L#O;lDirRgSTdMug3(jX*2BndiuqA2(EF$ z#!4@F#vk1D+1DJs0OSYX_&G4Vr~4}Iw_7$>@dCQjScx_5ily&!CJfhT>8WmWx9NC* zPIUNr+y!_mDCrwi0;DW5lq#tr?$Pahk#|J532rd@ySihY+`b4Z*(4=SXLqV8^{eDk z9_UUYdr*!GmlmG2`w*@efAl*pU%%2H^Sq(M9WON;LQ0%ciFuFoN#7oCZzZWIinWc( z+gncEcpMO)ju!kFda;;r>4XxIwDn>Y)pIVMIgiQfgWT>KJF401$LH|6W0<%&3D!*x zyYnaLkJWIEx%;h_38biP!@Un4wcoD^?ZPEM zgGBfphNbd_Sw-`zV1qG*N3198W#992p>^PL^CzSan={9v=Ca{yo4eWC-C{eEuk z?V>-a{fD?<{9A$d!qV%JlmM<>*zIJa#@ zE5CqJ5pn;pWYoY#op}vCGioVEU;zcfUKk;`XnPtV1uJsXRF1dTB^1BAyi+BRq~lEU z>y_hsN~&V^<>~`-GrAAp_lu}S{#u6)v!f!pQ9+;MmmEXf=L@DB>D|I4sTclAytBIf zj-U&bzLp{~fumG4E)VkEQA}S#Qt)z%BRU&?TeMGQ`aTZd-A{hMRic0}e1EKCvpubC zw{|9DjLm01$RyR5V=DsEj9LW1!?_s%qUX@R$tWNTW4?djcu@WAY(ift;|aZ*2qu1t-Du~hSAiIT z*5LXDJZV804tD#>U|$~1(~OhJ!{N-Td{nJ{UDaC)<0W!M@38HxlH0o-6E%vob}x$C zBQD_31jfwTdQ3-0=T~ttc^YT(hu=H-8dn{E-CVh+-P#Q7`I7n@)7t5g%~7qdpksOS z=J}bvWBGLt-(r_~8pXliziEZ-De9tS>;)KHFA1UgD^q6Ao_sT!P8T@dvBvrz-q8o> z19cOCDZ|AiM4 zRgG$*M<;Gt&5X{~NHm|JA?JkmPboDeZ8monTn=kSZrr0x|`_!@EYWkMr5mi2q zKez#@{e_*bf1_=;-L0nS*!=5B_U-~#!lHk;4*4Od;Tg;HqXfs*KH2tU^7t;V%wx16 zc`gpnFrPxaIiA>@)mXGTl@ybhTg(TO(g@ndc(cCJbGyz_K6f)gp+;MDU8(JY-C3j5jO<<|=Ioi- z64N3Y_7^(t`WNxW$Fu?(;lJV~ztAxcYLk=az0%Op56GO1KReII(%BqTSs&~1d}iY_ zj_<1wK&Tkp9r{EFER=yjd`1(Cuho+YluUT3LFO)P8?h55W*wjx+U zM2mWt28(AkUoNQ1EPDdN&9+2Jq4>NZn>@N^p96WzCcP^um`z#XkAHvb&oa%pRR>Eh zz>bK>`_Z-d`gW$<5thxA>&U=={3sJzOYoO#CS$J1!~_WZv7LoC);m*su>gHrSM6_` ziuFt|o10M*eWEJcj7-SN4!N$D;ds6ev5O7Not3cl$~3!^WY-0kk_1~N0Op&(NviR- zn6hW&qtIWJotwwtm9@$9CW6_)EFjd3D`K?|l|*hH=Qp)JF^QYuq1+`Ol^j&MUBWUwopy=-9(e->n5Qu;VZ!H?HpOoYH{uG}c#SxUA6lJFz? zC_I3J91{aOsDapCz<*1ZDFp_v1$J&XkH z&mdH{I5&%=@I_LEvm@wNKCj#wkhE9}w*%CrA;YXnbd?tzLDp+~J8d3p5Y3vytn4Uq zV5cMk8ygQZdCnpNJy2M#PlFN!A#HLy+l_M)b~T~0>Bu3q7%>yH21fHiW4CNVA38mz zc*r_}?+HCv9BxoHXa#}`5hx1tjUoXtF4})(sifB^y7_-|9>=CJTBIT)=q<;8U=pV{ z`uVO=EYzb!B{_P%31+SiTpMjkVy&fV&B%gEmZnDoVV@ilzpM`@dPuC^zE(u(C!%yc zaXXtC*IOYy+O9-IBUkterRFnW?p&6}F8khME_d)^*|#%;dCl?>y?{Jlykv4)xll{# zHsbRRQ*5}M){06crJ;uo|MIrTDS%KL%r#!c9`w@k`6R@?#3&6I`mNy+-xu`Kgj~H# z{a!7H@AdiVkoc&qMO$h?Jxa~_OzHQ`i6prQi(GvTI$75_L=s%h02ryM8Afdo=5~Z8 zfZ9!5j~bQ!+lT-%+ey(FH=B^^M5d4qDPs2mg8Js&UDO7vqpMrpnIVF{l zf*MumW=>|!T_H3q4!GW8bWxeIo&xn-@y4`M(Ene(*uHo`Yumw4Dkb|6Ke_-1qv-Z_a|z}^9;}e!dk}l6q5Xua#{tQYF#rPt zGdPG3CpbHg^2~WQ7VQSZYUtM0&vd|VYrGunQa8>ktPELS*QpeT&ixC6j!t|0r!-HO znO;Z-774jp5i7@dKk}PZSD~D}=;*(MUgn_4Q4*d#p+(tTYv-Ulg08 z$(`NJ{tLEr@q^Lu)}?)O3FKlxR3f$|l=lr`cN`s93|rg4<|M=%GQo))1V$)V)J_xp z2X(0+ST-(*6crc4Re5N5n0Kx!4k%`u;<2rL6h_NV=#OSkUho?n)a!{vpz%BBKr>N* zw;iQWlMdLjcuCeF6J1E!008LgbTOj zT44dn+UiU|c??^NDwU@`q%j#1S;6KXeQlec1UK=t2}u6BiO#_egjb)?($DZz3vpOv z&kvB~39NRC`%uFaiWsPonWY3gSN!V6z`S7EzwD`F{bZEPD#5Ka2=f^_PErzJs|bNq zO$uv2Fj7LG<^^-h;Y@{)MT9=P%Hud}_Z{SfXh?x8v+~x)aA=JS<{T(l;GxHn2p+>! zC8L;-y_sPdiQ4fu7Vvmme=jlg|K6cUTp>cEk|WOzb5ZrVKYJmaMyXnyqR^yj*hQkG z7C7Iq=BT!%X1n5zO88apw1MZ1IB<658)R9~VVGAy*(8K~nav+W%NF{HhsNAa~;KyEg-^NUR_K8aS0*+s3_}TZ`?4=)|tCkQoEnVWsa7~=l zKZwO;#}oF}Dp1y`CDSd6Ay-VFmlgRTf)=?1gzKyr2YP)QuxrOxPKglDK^zBgG;g>1{g@} zM%8$CqB2T@?B4R{fkWT6ljwYxED~_iftlU!D zTe$lo>fQp(@z7e8Xru=JG9d6MTUb0wka|VNhKl zcj1KNts)7Ev6y8ooAMKBO8XJ|J(FZS*6UiKPFP!a3ytlyfxIMGWV?kl7bw2yD2W@r zl?*C^>H2B)LBs!qNbJiK`?Fx(@QQ1FpM3_th}6!&&o`L? zPwW+9Kw_^wwgdRP|+ zIIwcN!b0kgp6O!9cN-i!H_hKzrt7~_aF3K09fGJBrFueMkT7A1F!Jm`C*~q%jUAZ# zfJ}=C90YjcNQVTvW6!lo^!mJglIyivbW|-){|YG6Ls+gnC8Vm3$KDq(r_|9tiBk-A zw|0THE~D>;U%BlCrqZ)t=JZZzfTaiu-!yuu|MEr%ix1xXS!ilXd|8%;b)ogz^*tpe zg_bvBH!7*+U#)WUcOfZMR3W}zNBUOiuB$xUsVkVm>rz^y5%UQLL`X$<1*g{W#+^dS3(FDTs%danUfwKJsnkS?$9quA`d_iuQR~@ZaA!hdK^q zX{E-GdwUL}0ktc_)mPCvZ(7AhiaFVU7}>4&=SR0OT21acfnM0X{)t{0F~`gF`CD|r zi}+Qk5g0(vmi!i5mPPwzx#119`F_pZ&Nhf!j5C{YM)hey&AY7CFz!;lIw~*}Az)2R z_@TR)E+yKE0xTyWIRr<*_0rt5BYR(67wX%TeLPo!=;f9ZZ{LY1RuiPC#OO9Oe)5yi zwi~` zYFwlHqqCZ?U5s%Yt61pw0aDTquoy;$&7TRVzUX_p3)MESOyyd>5GxN6e=GIS{p*3B z#Bt~E#nt77yCQ**1;%IuL~j;s&9{_mGI1mLt#%ip?FzX5TKh*{1x=|)|@d54t z>5|`tu+NI@)h;(>LsVDfhq?nwwxYiyT5|%Av8+9?x%y_WEyX(+D8LbGew#7AYo57!jHcR{GDT^acqYi@Rrh6Wol872ZFTAE)xM8h7oA--$4*;^wo>bX}VWFuw(Ur-!Q@f4~dU2|zN z{C%duIj=xO0OtFLQxqVgs09eTA2uk|eh4a9vSz}zkQ_;IEx%dK0Q9Y0$zF$f?6GgE zuIsJA#mS|@<5)yqkM~oWkUII-EAw-3JL`Li#Afd}rvHt6$_mhfJA=+u0_EMi<|I>4J)E zZ^#(_nDIfw24Fqbx8V3*>f>=?Mj#D2YV1BSwcA>yTkvE9@W+nCwck$jod>)&+O5-{ zxcVklARozBBM{+5+U?NFeaU$lr<0US`v*yw*$^=0L-q>1caCR-A;F>Zn(>`*gH-6p z-;OE2fWiPVRtS_;s8Vu59612|o~gFpR%O7l3$edTR`oF$@*lD9r6nujeVr}R^g)G1 zM`2fu>4CSxq-KQVhgWxV0ge=GEn%}G+LrAzw&p$4&fZP6d;{9w9rCq`2s)(V#5DhuqC++-nDSlUSVpHZ|vVX=gMpCcTEIh|=GJ%M?9md)NgGsQr{`eZRPM1j@eO7rA3+u^W6 zSi?s9x^FkBqD-~bWQx{&${ytVc;iD==hW`4w9y5si=&z`TvqAjkKSfv1&q6M~+Qw87V7tdv~%v$QWGBqpKBr!Mp&6k851U$9->)Y%cH`SMrm)E(z(`@vT}~395_3 zT74h%?6NKsM4#Jy_LyL6C$U1b&5}B^{+nU+{1^UOvk`^!h4_H4^(P?A~|JRtXsMM=GScHJ`_xUP2Do&{- zDkw_p>riH|`-42zKib`G>Gn5t=D_EWae-Myd@Qak9v0)RtE1DiCn{qBOp-4td3mPY z!WT45MkLMrTzG1edetmd>E?zyNRN1kz5GF+8Z5Si<7b(@s%#Q1hQu>nxhd zQQs%CG@+pC9Ipg1E{8a7f%QA4SN2Dm?KBIeQW|BFLnAVpkg$zFNGALk_G4`lCBO_F z>3XX!2DGo}2A4AS7>s8!Jl;Bh|5)`FjRt*xG>NCdK<1O|Kv<-Nz|zwKpfpFIo1N|G z5%gPzF{yS6%2D?mp5lP;%PS1Z^&7NXTka5et3jwiI=lJ!h;?eSG&w=|D}Cy}j;mN9 z;?e{A()9a)+_^1glna>fzTcx0*|gklY??I!Y3BFZ=oR|PnhV)+cgqAg;*h(mYGnab z!z_@7vs>MFK}H-LW~({4?eY!R7wv;+h%yFhSiAF^M>%8!BI7d1NiSb5Jp|b-=a3E3 zV0vBdr5|JiT<$DjiD(wPH^c&}0gE4$?=D+Coy#4fVeL$yhHEj^ORS-O$^`o9cFEVE zhmg&B3rvL-l<&^U?k`b?_{q!{=qJ(w;lY2#_kRU^&I-G1U;qM7S3j3^P6 $(OBJ_DIR)/dpd_cubin.h +$(OBJ_DIR)/ufm.cubin: lal_ufm.cu lal_precision.h lal_preprocessor.h + $(CUDA) --cubin -DNV_KERNEL -o $@ lal_ufm.cu + +$(OBJ_DIR)/ufm_cubin.h: $(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm.cubin + $(BIN2C) -c -n ufm $(OBJ_DIR)/ufm.cubin > $(OBJ_DIR)/ufm_cubin.h + +$(OBJ_DIR)/lal_ufm.o: $(ALL_H) lal_ufm.h lal_ufm.cpp $(OBJ_DIR)/ufm_cubin.h $(OBJ_DIR)/lal_base_atomic.o + $(CUDR) -o $@ -c lal_ufm.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_ufm_ext.o: $(ALL_H) lal_ufm.h lal_ufm_ext.cpp lal_base_atomic.h + $(CUDR) -o $@ -c lal_ufm_ext.cpp -I$(OBJ_DIR) + $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_base_dpd.o $(CUDR) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR) diff --git a/lib/gpu/README b/lib/gpu/README index b26897e885..15b65516ac 100644 --- a/lib/gpu/README +++ b/lib/gpu/README @@ -135,6 +135,7 @@ Current styles supporting GPU acceleration: 38 yukawa/colloid 39 yukawa 40 pppm + 41 ufm MULTIPLE LAMMPS PROCESSES diff --git a/lib/gpu/lal_ufm.cpp b/lib/gpu/lal_ufm.cpp new file mode 100644 index 0000000000..c7aa2cca39 --- /dev/null +++ b/lib/gpu/lal_ufm.cpp @@ -0,0 +1,172 @@ +/*************************************************************************** + ufm.cpp + ------------------- + Rodolfo Paula Leite (Unicamp/Brazil) + Maurice de Koning (Unicamp/Brazil) + + Class for acceleration of the ufm pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : pl.rodolfo@gmail.com + dekoning@ifi.unicamp.br + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "ufm_cl.h" +#elif defined(USE_CUDART) +const char *ufm=0; +#else +#include "ufm_cubin.h" +#endif + +#include "lal_ufm.h" +#include +using namespace LAMMPS_AL; +#define UFMT UFM + +extern Device device; + +template +UFMT::UFM() : BaseAtomic(), _allocated(false) { +} + +template +UFMT::~UFM() { + clear(); +} + +template +int UFMT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int UFMT::init(const int ntypes, + double **host_cutsq, double **host_uf1, + double **host_uf2, double **host_uf3, + double **host_uf4, double **host_offset, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,ufm,"k_ufm"); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + int max_shared_types=this->device->max_shared_types(); + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,uf1,host_write,host_uf1,host_uf2, + host_cutsq); + + uf3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,uf3,host_write,host_uf3,host_uf4, + host_offset); + + UCL_H_Vec dview; + sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(host_special_lj,4,*(this->ucl_device)); + ucl_copy(sp_lj,dview,false); + + _allocated=true; + this->_max_bytes=uf1.row_bytes()+uf3.row_bytes()+sp_lj.row_bytes(); + return 0; +} + +template +void UFMT::reinit(const int ntypes, double **host_cutsq, double **host_uf1, + double **host_uf2, double **host_uf3, + double **host_uf4, double **host_offset) { + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(_lj_types*_lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; i<_lj_types*_lj_types; i++) + host_write[i]=0.0; + + this->atom->type_pack4(ntypes,_lj_types,uf1,host_write,host_uf1,host_uf2, + host_cutsq); + this->atom->type_pack4(ntypes,_lj_types,uf3,host_write,host_uf3,host_uf4, + host_offset); +} + +template +void UFMT::clear() { + if (!_allocated) + return; + _allocated=false; + + uf1.clear(); + uf3.clear(); + sp_lj.clear(); + this->clear_atomic(); +} + +template +double UFMT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(UFM); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +void UFMT::loop(const bool _eflag, const bool _vflag) { + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int eflag, vflag; + if (_eflag) + eflag=1; + else + eflag=0; + + if (_vflag) + vflag=1; + else + vflag=0; + + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_fast.set_size(GX,BX); + this->k_pair_fast.run(&this->atom->x, &uf1, &uf3, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, + &vflag, &ainum, &nbor_pitch, + &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &uf1, &uf3, &_lj_types, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, &vflag, + &ainum, &nbor_pitch, &this->_threads_per_atom); + } + this->time_pair.stop(); +} + +template class UFM; diff --git a/lib/gpu/lal_ufm.cu b/lib/gpu/lal_ufm.cu new file mode 100644 index 0000000000..51c4df3b5b --- /dev/null +++ b/lib/gpu/lal_ufm.cu @@ -0,0 +1,188 @@ +/*************************************************************************** + ufm.cu + ------------------- + Rodolfo Paula Leite (Unicamp/Brazil) + Maurice de Koning (Unicamp/Brazil) + + Device code for acceleration of the ufm pair style + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : pl.rodolfo@gmail.com + dekoning@ifi.unicamp.br + ***************************************************************************/ + +#ifdef NV_KERNEL +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +texture pos_tex; +#else +texture pos_tex; +#endif +#else +#define pos_tex x_ +#endif + +__kernel void k_ufm(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict uf1, + const __global numtyp4 *restrict uf3, + const int lj_types, + const __global numtyp *restrict sp_lj, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp4 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + acctyp energy=(acctyp)0; + acctyp4 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp virial[6]; + for (int i=0; i<6; i++) + virial[i]=(acctyp)0; + + if (ii0) { + energy += - factor_lj * uf3[mtype].x*log(1.0 - expuf) - uf3[mtype].z; + } + if (vflag>0) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); + } // if ii +} + +__kernel void k_ufm_fast(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict uf1_in, + const __global numtyp4 *restrict uf3_in, + const __global numtyp *restrict sp_lj_in, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp4 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + __local numtyp4 uf1[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp4 uf3[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp sp_lj[4]; + if (tid<4) + sp_lj[tid]=sp_lj_in[tid]; + if (tid0) + uf3[tid]=uf3_in[tid]; + } + + acctyp energy=(acctyp)0; + acctyp4 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp virial[6]; + for (int i=0; i<6; i++) + virial[i]=(acctyp)0; + + __syncthreads(); + + if (ii0) { + energy += - factor_lj * uf3[mtype].x * log(1.0 - expuf) - uf3[mtype].z; + } + if (vflag>0) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); + } // if ii +} + diff --git a/lib/gpu/lal_ufm.h b/lib/gpu/lal_ufm.h new file mode 100644 index 0000000000..aeeaacbe99 --- /dev/null +++ b/lib/gpu/lal_ufm.h @@ -0,0 +1,86 @@ +/*************************************************************************** + ufm.h + ------------------- + Rodolfo Paula Leite (Unicamp/Brazil) + Maurice de Koning (Unicamp/Brazil) + + Class for acceleration of the ufm pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : pl.rodolfo@gmail.com + dekoning@ifi.unicamp.br + ***************************************************************************/ + +#ifndef LAL_UFM_H +#define LAL_UFM_H + +#include "lal_base_atomic.h" + +namespace LAMMPS_AL { + +template +class UFM : public BaseAtomic { + public: + UFM(); + ~UFM(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double **host_uf1, double **host_uf2, double **host_uf3, + double **host_uf4, double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen); + + /// Send updated coeffs from host to device (to be compatible with fix adapt) + void reinit(const int ntypes, double **host_cutsq, + double **host_uf1, double **host_uf2, double **host_uf3, + double **host_uf4, double **host_offset); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + // --------------------------- TYPE DATA -------------------------- + + /// uf1.x = uf1, uf1.y = uf2, uf1.z = cutsq + UCL_D_Vec uf1; + /// uf3.x = uf3, uf3.y = uf4, uf3.z = offset + UCL_D_Vec uf3; + /// Special LJ values + UCL_D_Vec sp_lj; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + private: + bool _allocated; + void loop(const bool _eflag, const bool _vflag); +}; + +} + +#endif diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh index f4aeaa2706..88f47a3dc4 100644 --- a/src/GPU/Install.sh +++ b/src/GPU/Install.sh @@ -131,6 +131,8 @@ action pair_zbl_gpu.cpp action pair_zbl_gpu.h action pppm_gpu.cpp pppm.cpp action pppm_gpu.h pppm.cpp +action pair_ufm_gpu.cpp +action pair_ufm_gpu.h # edit 2 Makefile.package files to include/exclude package info diff --git a/src/GPU/pair_ufm_gpu.cpp b/src/GPU/pair_ufm_gpu.cpp new file mode 100644 index 0000000000..96af0dc069 --- /dev/null +++ b/src/GPU/pair_ufm_gpu.cpp @@ -0,0 +1,240 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: + Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com + Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br + ------------------------------------------------------------------------- */ + +#include +#include +#include +#include "pair_ufm_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include +#include "gpu_extra.h" + +using namespace LAMMPS_NS; + +// External functions from cuda library for atom decomposition + +int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1, + double **host_uf2, double **host_uf3, double **host_uf4, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); + +int ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1, + double **host_uf2, double **host_uf3, double **host_uf4, + double **offset); + +void ufml_gpu_clear(); +int ** ufml_gpu_compute_n(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, + const double cpu_time, bool &success); +void ufml_gpu_compute(const int ago, const int inum, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); +double ufml_gpu_bytes(); + +/* ---------------------------------------------------------------------- */ + +PairUFMGPU::PairUFMGPU(LAMMPS *lmp) : PairUFM(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairUFMGPU::~PairUFMGPU() +{ + ufml_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairUFMGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = ufml_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + ufml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success); + } + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + if (host_startnewton_pair) + error->all(FLERR,"Cannot use newton pair with ufm/gpu pair style"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = ufml_gpu_init(atom->ntypes+1, cutsq, uf1, uf2, uf3, uf4, + offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairUFMGPU::reinit() +{ + Pair::reinit(); + + ufml_gpu_reinit(atom->ntypes+1, cutsq, uf1, uf2, uf3, uf4, offset); +} + +/* ---------------------------------------------------------------------- */ + +double PairUFMGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + ufml_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairUFMGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, int **firstneigh) { + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,expuf,factor_lj; + int *jlist; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + double *special_lj = force->special_lj; + + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + expuf = exp(- rsq * uf2[itype][jtype]); + fpair = factor_lj * uf1[itype][jtype] * expuf /(1.0 - expuf); + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) { + evdwl = -factor_lj * uf3[itype][jtype] * log(1.0 - expuf) - offset[itype][jtype]; + } + + if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_ufm_gpu.h b/src/GPU/pair_ufm_gpu.h new file mode 100644 index 0000000000..59b883f3aa --- /dev/null +++ b/src/GPU/pair_ufm_gpu.h @@ -0,0 +1,65 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: + Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com + Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br + ------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(ufm/gpu,PairUFMGPU) + +#else + +#ifndef LMP_PAIR_UFM_GPU_H +#define LMP_PAIR_UFM_GPU_H + +#include "pair_ufm.h" + +namespace LAMMPS_NS { + +class PairUFMGPU : public PairUFM { + public: + PairUFMGPU(LAMMPS *lmp); + ~PairUFMGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + void reinit(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Insufficient memory on accelerator + +There is insufficient memory on one of the devices specified for the gpu +package + +E: Cannot use newton pair with ufm/gpu pair style + +Self-explanatory. + +*/ diff --git a/src/OPT/Install.sh b/src/OPT/Install.sh index ca1231c615..c6ae2b914b 100644 --- a/src/OPT/Install.sh +++ b/src/OPT/Install.sh @@ -46,3 +46,5 @@ action pair_lj_long_coul_long_opt.cpp pair_lj_long_coul_long.cpp action pair_lj_long_coul_long_opt.h pair_lj_long_coul_long.cpp action pair_morse_opt.cpp action pair_morse_opt.h +action pair_ufm_opt.cpp +action pair_ufm_opt.h diff --git a/src/OPT/pair_ufm_opt.cpp b/src/OPT/pair_ufm_opt.cpp new file mode 100644 index 0000000000..1cf504674d --- /dev/null +++ b/src/OPT/pair_ufm_opt.cpp @@ -0,0 +1,197 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: + Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com + Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br + ------------------------------------------------------------------------- */ + +#include +#include "pair_ufm_opt.h" +#include "atom.h" +#include "force.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairUFMOpt::PairUFMOpt(LAMMPS *lmp) : PairUFM(lmp) {} + +/* ---------------------------------------------------------------------- */ + +void PairUFMOpt::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + if (evflag) { + if (eflag) { + if (force->newton_pair) return eval<1,1,1>(); + else return eval<1,1,0>(); + } else { + if (force->newton_pair) return eval<1,0,1>(); + else return eval<1,0,0>(); + } + } else { + if (force->newton_pair) return eval<0,0,1>(); + else return eval<0,0,0>(); + } +} + +/* ---------------------------------------------------------------------- */ + +template < int EVFLAG, int EFLAG, int NEWTON_PAIR > +void PairUFMOpt::eval() +{ + typedef struct { double x,y,z; } vec3_t; + + typedef struct { + double cutsq,uf1,uf2,uf3,uf4,scale,offset; + double _pad[2]; + } fast_alpha_t; + + int i,j,ii,jj,inum,jnum,itype,jtype,sbindex; + double factor_lj; + double evdwl = 0.0; + + double** _noalias x = atom->x; + double** _noalias f = atom->f; + int* _noalias type = atom->type; + int nlocal = atom->nlocal; + double* _noalias special_lj = force->special_lj; + + inum = list->inum; + int* _noalias ilist = list->ilist; + int** _noalias firstneigh = list->firstneigh; + int* _noalias numneigh = list->numneigh; + + vec3_t* _noalias xx = (vec3_t*)x[0]; + vec3_t* _noalias ff = (vec3_t*)f[0]; + + int ntypes = atom->ntypes; + int ntypes2 = ntypes*ntypes; + + fast_alpha_t* _noalias fast_alpha = + (fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t)); + for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) { + fast_alpha_t& a = fast_alpha[i*ntypes+j]; + a.cutsq = cutsq[i+1][j+1]; + a.uf1 = uf1[i+1][j+1]; + a.uf2 = uf2[i+1][j+1]; + a.uf3 = uf3[i+1][j+1]; + a.uf4 = uf4[i+1][j+1]; + a.scale = scale[i+1][j+1]; + a.offset = offset[i+1][j+1]; + } + fast_alpha_t* _noalias tabsix = fast_alpha; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + double xtmp = xx[i].x; + double ytmp = xx[i].y; + double ztmp = xx[i].z; + itype = type[i] - 1; + int* _noalias jlist = firstneigh[i]; + jnum = numneigh[i]; + + double tmpfx = 0.0; + double tmpfy = 0.0; + double tmpfz = 0.0; + + fast_alpha_t* _noalias tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + sbindex = sbmask(j); + + if (sbindex == 0) { + double delx = xtmp - xx[j].x; + double dely = ytmp - xx[j].y; + double delz = ztmp - xx[j].z; + double rsq = delx*delx + dely*dely + delz*delz; + + jtype = type[j] - 1; + + fast_alpha_t& a = tabsixi[jtype]; + + if (rsq < a.cutsq) { + double expuf = exp(- rsq * a.uf2); + double fpair = a.scale * a.uf1 * expuf / (1.0 - expuf); + + tmpfx += delx*fpair; + tmpfy += dely*fpair; + tmpfz += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + ff[j].x -= delx*fpair; + ff[j].y -= dely*fpair; + ff[j].z -= delz*fpair; + } + + if (EFLAG) evdwl = - a.uf3 * log(1.0 - expuf) - a.offset; + + if (EVFLAG) + ev_tally(i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz); + } + + } else { + factor_lj = special_lj[sbindex]; + j &= NEIGHMASK; + + double delx = xtmp - xx[j].x; + double dely = ytmp - xx[j].y; + double delz = ztmp - xx[j].z; + double rsq = delx*delx + dely*dely + delz*delz; + + int jtype1 = type[j]; + jtype = jtype1 - 1; + + fast_alpha_t& a = tabsixi[jtype]; + if (rsq < a.cutsq) { + fast_alpha_t& a = tabsixi[jtype]; + double expuf = exp(- rsq * a.uf2); + double fpair = a.scale * factor_lj * a.uf1 * expuf / (1.0 - expuf); + + tmpfx += delx*fpair; + tmpfy += dely*fpair; + tmpfz += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + ff[j].x -= delx*fpair; + ff[j].y -= dely*fpair; + ff[j].z -= delz*fpair; + } + + if (EFLAG) { + evdwl = - a.uf3 * log(1.0 - expuf) - a.offset; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz); + } + } + } + + ff[i].x += tmpfx; + ff[i].y += tmpfy; + ff[i].z += tmpfz; + } + + free(fast_alpha); fast_alpha = 0; + + if (vflag_fdotr) virial_fdotr_compute(); +} diff --git a/src/OPT/pair_ufm_opt.h b/src/OPT/pair_ufm_opt.h new file mode 100644 index 0000000000..edac708403 --- /dev/null +++ b/src/OPT/pair_ufm_opt.h @@ -0,0 +1,45 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: + Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com + Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br + ------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(ufm/opt,PairUFMOpt) + +#else + +#ifndef LMP_PAIR_UFM_OPT_H +#define LMP_PAIR_UFM_OPT_H + +#include "pair_ufm.h" + +namespace LAMMPS_NS { + +class PairUFMOpt : public PairUFM { + public: + PairUFMOpt(class LAMMPS *); + void compute(int, int); + + private: + template < int EVFLAG, int EFLAG, int NEWTON_PAIR > void eval(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_ufm_omp.cpp b/src/USER-OMP/pair_ufm_omp.cpp new file mode 100644 index 0000000000..b2e2cd29ee --- /dev/null +++ b/src/USER-OMP/pair_ufm_omp.cpp @@ -0,0 +1,159 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: + Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com + Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br + ------------------------------------------------------------------------- */ + +#include +#include "pair_ufm_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairUFMOMP::PairUFMOMP(LAMMPS *lmp) : + PairUFM(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairUFMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + thr->timer(Timer::PAIR); + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairUFMOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,expuf,factor; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const int * _noalias const type = atom->type; + const int nlocal = atom->nlocal; + const double * _noalias const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + expuf = exp(- rsq * uf2[itype][jtype]); + fpair = factor * scale[itype][jtype] * uf1[itype][jtype] * expuf /(1.0 - expuf); + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= delx*fpair; + f[j].y -= dely*fpair; + f[j].z -= delz*fpair; + } + + if (EFLAG) { + evdwl = -uf3[itype][jtype] * log(1.0 - expuf) - offset[itype][jtype]; + evdwl *= factor; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairUFMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairUFM::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_ufm_omp.h b/src/USER-OMP/pair_ufm_omp.h new file mode 100644 index 0000000000..2a01da15d0 --- /dev/null +++ b/src/USER-OMP/pair_ufm_omp.h @@ -0,0 +1,50 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: + Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com + Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br + ------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(ufm/omp,PairUFMOMP) + +#else + +#ifndef LMP_PAIR_UFM_OMP_H +#define LMP_PAIR_UFM_OMP_H + +#include "pair_ufm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairUFMOMP : public PairUFM, public ThrOMP { + + public: + PairUFMOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/pair_ufm.cpp b/src/pair_ufm.cpp new file mode 100644 index 0000000000..5307ced365 --- /dev/null +++ b/src/pair_ufm.cpp @@ -0,0 +1,376 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + ------------------------------------------------------------------------- */ + +/* ----------------------------------------------------------------------- + Contributing author: + Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com + Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br + ------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_ufm.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "update.h" +#include "integrate.h" +#include "respa.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +PairUFM::PairUFM(LAMMPS *lmp) : Pair(lmp) +{ + writedata = 1; +} + +/* ---------------------------------------------------------------------- */ + +PairUFM::~PairUFM() +{ + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + memory->destroy(cut); + memory->destroy(epsilon); + memory->destroy(sigma); + memory->destroy(scale); + memory->destroy(uf1); + memory->destroy(uf2); + memory->destroy(uf3); + memory->destroy(uf4); + memory->destroy(offset); + } +} + +/* ---------------------------------------------------------------------- */ + +void PairUFM::compute(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq, expuf, factor; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + expuf = exp(- rsq * uf2[itype][jtype]); + fpair = factor * scale[itype][jtype] * uf1[itype][jtype] * expuf /(1.0 - expuf); + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (eflag) { + evdwl = -uf3[itype][jtype] * log(1.0 - expuf) - offset[itype][jtype]; + evdwl *= factor; + } + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,0.0,fpair,delx,dely,delz); + } + } + } + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairUFM::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create(cutsq,n+1,n+1,"pair:cutsq"); + + memory->create(cut,n+1,n+1,"pair:cut"); + memory->create(epsilon,n+1,n+1,"pair:epsilon"); + memory->create(sigma,n+1,n+1,"pair:sigma"); + memory->create(scale,n+1,n+1,"pair:scale"); + memory->create(uf1,n+1,n+1,"pair:uf1"); + memory->create(uf2,n+1,n+1,"pair:uf2"); + memory->create(uf3,n+1,n+1,"pair:uf3"); + memory->create(uf4,n+1,n+1,"pair:uf4"); + memory->create(offset,n+1,n+1,"pair:offset"); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairUFM::settings(int narg, char **arg) +{ + if (narg != 1) error->all(FLERR,"Illegal pair_style command"); + + cut_global = force->numeric(FLERR,arg[0]); + + // reset cutoffs that have been explicitly set + + if (allocated) { + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i+1; j <= atom->ntypes; j++) + if (setflag[i][j]) cut[i][j] = cut_global; + } +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairUFM::coeff(int narg, char **arg) +{ + if (narg < 4 || narg > 5) + error->all(FLERR,"Incorrect args for pair coefficients"); + if (!allocated) allocate(); + + int ilo,ihi,jlo,jhi; + force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); + force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); + + double epsilon_one = force->numeric(FLERR,arg[2]); + double sigma_one = force->numeric(FLERR,arg[3]); + + double cut_one = cut_global; + + if (narg == 5) cut_one = force->numeric(FLERR,arg[4]); + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + epsilon[i][j] = epsilon_one; + sigma[i][j] = sigma_one; + scale[i][j] = 1.0; + cut[i][j] = cut_one; + setflag[i][j] = 1; + count++; + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairUFM::init_one(int i, int j) +{ + if (setflag[i][j] == 0) { + epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j], + sigma[i][i],sigma[j][j]); + sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]); + cut[i][j] = mix_distance(cut[i][i],cut[j][j]); + } + + uf1[i][j] = 2.0 * epsilon[i][j] / pow(sigma[i][j],2.0); + uf2[i][j] = 1.0 / pow(sigma[i][j],2.0); + uf3[i][j] = epsilon[i][j]; + uf4[i][j] = sigma[i][j]; + + if (offset_flag) { + double ratio = pow(cut[i][j] / sigma[i][j],2.0); + offset[i][j] = - epsilon[i][j] * log ( 1.0 - exp( -ratio )) ; + } else offset[i][j] = 0.0; + + uf1[j][i] = uf1[i][j]; + uf2[j][i] = uf2[i][j]; + uf3[j][i] = uf3[i][j]; + uf4[j][i] = uf4[i][j]; + scale[j][i] = scale[i][j]; + offset[j][i] = offset[i][j]; + + return cut[i][j]; +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairUFM::write_restart(FILE *fp) +{ + write_restart_settings(fp); + + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + fwrite(&setflag[i][j],sizeof(int),1,fp); + if (setflag[i][j]) { + fwrite(&epsilon[i][j],sizeof(double),1,fp); + fwrite(&sigma[i][j],sizeof(double),1,fp); + fwrite(&cut[i][j],sizeof(double),1,fp); + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairUFM::read_restart(FILE *fp) +{ + read_restart_settings(fp); + allocate(); + + int i,j; + int me = comm->me; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); + MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); + if (setflag[i][j]) { + if (me == 0) { + fread(&epsilon[i][j],sizeof(double),1,fp); + fread(&sigma[i][j],sizeof(double),1,fp); + fread(&cut[i][j],sizeof(double),1,fp); + } + MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world); + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairUFM::write_restart_settings(FILE *fp) +{ + fwrite(&cut_global,sizeof(double),1,fp); + fwrite(&offset_flag,sizeof(int),1,fp); + fwrite(&mix_flag,sizeof(int),1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairUFM::read_restart_settings(FILE *fp) +{ + int me = comm->me; + if (me == 0) { + fread(&cut_global,sizeof(double),1,fp); + fread(&offset_flag,sizeof(int),1,fp); + fread(&mix_flag,sizeof(int),1,fp); + } + MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world); + MPI_Bcast(&offset_flag,1,MPI_INT,0,world); + MPI_Bcast(&mix_flag,1,MPI_INT,0,world); +} + +/* ---------------------------------------------------------------------- + proc 0 writes to data file +------------------------------------------------------------------------- */ + +void PairUFM::write_data(FILE *fp) +{ + for (int i = 1; i <= atom->ntypes; i++) + fprintf(fp,"%d %g %g\n",i,epsilon[i][i],sigma[i][i]); +} + +/* ---------------------------------------------------------------------- + proc 0 writes all pairs to data file +------------------------------------------------------------------------- */ + +void PairUFM::write_data_all(FILE *fp) +{ + for (int i = 1; i <= atom->ntypes; i++) + for (int j = i; j <= atom->ntypes; j++) + fprintf(fp,"%d %d %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],cut[i][j]); +} + +/* ---------------------------------------------------------------------- */ + +double PairUFM::single(int i, int j, int itype, int jtype, double rsq, + double factor_coul, double factor_lj, + double &fforce) +{ + double expuf,phiuf; + expuf = exp(- rsq * uf2[itype][jtype]); + fforce = factor_lj * uf1[itype][jtype] * expuf /(1.0 - expuf); + phiuf = - uf3[itype][jtype] * log(1.0 - expuf) - offset[itype][jtype]; + return factor_lj * phiuf; +} + +/* ---------------------------------------------------------------------- */ + +void *PairUFM::extract(const char *str, int &dim) +{ + dim = 2; + if (strcmp(str,"epsilon") == 0) return (void *) epsilon; + if (strcmp(str,"sigma") == 0) return (void *) sigma; + if (strcmp(str,"scale") == 0) return (void *) scale; + return NULL; +} diff --git a/src/pair_ufm.h b/src/pair_ufm.h new file mode 100644 index 0000000000..2161c2acaf --- /dev/null +++ b/src/pair_ufm.h @@ -0,0 +1,76 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: + Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com + Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br + ------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(ufm,PairUFM) + +#else + +#ifndef LMP_PAIR_UFM_H +#define LMP_PAIR_UFM_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairUFM : public Pair { + public: + PairUFM(class LAMMPS *); + virtual ~PairUFM(); + virtual void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + double init_one(int, int); + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + void write_data(FILE *); + void write_data_all(FILE *); + double single(int, int, int, int, double, double, double, double &); + void *extract(const char *, int &); + + protected: + double cut_global; + double **cut,**scale; + double **epsilon,**sigma; + double **uf1,**uf2,**uf3,**uf4,**offset; + + virtual void allocate(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +*/ From 941ee565a11b52012a36cd91150576b4f5fea6b8 Mon Sep 17 00:00:00 2001 From: Rodolfo Leite Date: Tue, 24 Oct 2017 11:12:51 -0200 Subject: [PATCH 2/6] Added lal_ufm_ext.cpp in lib/gpu --- lib/gpu/lal_ufm_ext.cpp | 143 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 lib/gpu/lal_ufm_ext.cpp diff --git a/lib/gpu/lal_ufm_ext.cpp b/lib/gpu/lal_ufm_ext.cpp new file mode 100644 index 0000000000..ae4a5fb8fc --- /dev/null +++ b/lib/gpu/lal_ufm_ext.cpp @@ -0,0 +1,143 @@ +/*************************************************************************** + ufm_ext.cpp + ------------------------------ + Rodolfo Paula Leite (Unicamp/Brazil) + Maurice de Koning (Unicamp/Brazil) + + Functions for LAMMPS access to ufm acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : pl.rodolfo@gmail.com + dekoning@ifi.unicamp.br + ***************************************************************************/ + +#include +#include +#include + +#include "lal_ufm.h" + +using namespace std; +using namespace LAMMPS_AL; + +static UFM UFMLMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1, + double **host_uf2, double **host_uf3, double **host_uf4, + double **offset, double *special_lj, const int inum, const int nall, + const int max_nbors, const int maxspecial, const double cell_size, + int &gpu_mode, FILE *screen) { + UFMLMF.clear(); + gpu_mode=UFMLMF.device->gpu_mode(); + double gpu_split=UFMLMF.device->particle_split(); + int first_gpu=UFMLMF.device->first_device(); + int last_gpu=UFMLMF.device->last_device(); + int world_me=UFMLMF.device->world_me(); + int gpu_rank=UFMLMF.device->gpu_rank(); + int procs_per_gpu=UFMLMF.device->procs_per_gpu(); + + UFMLMF.device->init_message(screen,"ufm",first_gpu,last_gpu); + + bool message=false; + if (UFMLMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=UFMLMF.init(ntypes, cutsq, host_uf1, host_uf2, host_uf3, + host_uf4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen); + + UFMLMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; igpu_barrier(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + UFMLMF.estimate_gpu_overhead(); + return init_ok; +} + +// --------------------------------------------------------------------------- +// Copy updated coeffs from host to device +// --------------------------------------------------------------------------- +void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1, + double **host_uf2, double **host_uf3, double **host_uf4, + double **offset) { + int world_me=UFMLMF.device->world_me(); + int gpu_rank=UFMLMF.device->gpu_rank(); + int procs_per_gpu=UFMLMF.device->procs_per_gpu(); + + if (world_me==0) + UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, host_uf4, offset); + UFMLMF.device->world_barrier(); + + for (int i=0; igpu_barrier(); + } +} + +void ufml_gpu_clear() { + UFMLMF.clear(); +} + +int ** ufml_gpu_compute_n(const int ago, const int inum_full, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success) { + return UFMLMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success); +} + +void ufml_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success) { + UFMLMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj, + firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success); +} + +double ufml_gpu_bytes() { + return UFMLMF.host_memory_usage(); +} + + From 11cddd8798e14b7a8cffb855833aeef00825ab2b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 6 Jan 2018 19:42:40 -0500 Subject: [PATCH 3/6] explicitly include math.h --- src/OPT/pair_ufm_opt.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/OPT/pair_ufm_opt.cpp b/src/OPT/pair_ufm_opt.cpp index 1cf504674d..f6f4c4ce3e 100644 --- a/src/OPT/pair_ufm_opt.cpp +++ b/src/OPT/pair_ufm_opt.cpp @@ -18,6 +18,7 @@ ------------------------------------------------------------------------- */ #include +#include #include "pair_ufm_opt.h" #include "atom.h" #include "force.h" From f77483e4374208eabb01218a746008fd8fbb9d85 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 6 Jan 2018 19:44:07 -0500 Subject: [PATCH 4/6] adapt #include statements to current LAMMPS conventions --- src/pair_ufm.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pair_ufm.cpp b/src/pair_ufm.cpp index 5307ced365..6462c0e797 100644 --- a/src/pair_ufm.cpp +++ b/src/pair_ufm.cpp @@ -17,10 +17,10 @@ Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br ------------------------------------------------------------------------- */ -#include "math.h" -#include "stdio.h" -#include "stdlib.h" -#include "string.h" +#include +#include +#include +#include #include "pair_ufm.h" #include "atom.h" #include "comm.h" From 2ff278defac0b6ad4cad1302933f374478017b5d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 2 Feb 2018 10:02:44 +0100 Subject: [PATCH 5/6] fully integrate ufm into documentation --- doc/src/Section_commands.txt | 1 + doc/src/lammps.book | 1 + doc/src/pairs.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/doc/src/Section_commands.txt b/doc/src/Section_commands.txt index e816c8831b..d74d8c897d 100644 --- a/doc/src/Section_commands.txt +++ b/doc/src/Section_commands.txt @@ -1021,6 +1021,7 @@ KOKKOS, o = USER-OMP, t = OPT. "tip4p/cut (o)"_pair_coul.html, "tip4p/long (o)"_pair_coul.html, "tri/lj"_pair_tri_lj.html, +"ufm (got)"_pair_ufm.html, "vashishta (ko)"_pair_vashishta.html, "vashishta/table (o)"_pair_vashishta.html, "yukawa (go)"_pair_yukawa.html, diff --git a/doc/src/lammps.book b/doc/src/lammps.book index 0691f43e9b..83f060977b 100644 --- a/doc/src/lammps.book +++ b/doc/src/lammps.book @@ -511,6 +511,7 @@ pair_tersoff_mod.html pair_tersoff_zbl.html pair_thole.html pair_tri_lj.html +pair_ufm.html pair_vashishta.html pair_yukawa.html pair_yukawa_colloid.html diff --git a/doc/src/pairs.txt b/doc/src/pairs.txt index ec21b7a02e..d694aed8d7 100644 --- a/doc/src/pairs.txt +++ b/doc/src/pairs.txt @@ -100,6 +100,7 @@ Pair Styles :h1 pair_tersoff_zbl pair_thole pair_tri_lj + pair_ufm pair_vashishta pair_yukawa pair_yukawa_colloid From 85fdf9eaba2b94430f2c207c833bac35b95df64c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 2 Feb 2018 10:10:27 +0100 Subject: [PATCH 6/6] make links to papers unique across files --- doc/src/fix_ti_spring.txt | 10 +++++----- doc/src/pair_ufm.txt | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/src/fix_ti_spring.txt b/doc/src/fix_ti_spring.txt index afb1dcf8ff..191f9e7c6b 100644 --- a/doc/src/fix_ti_spring.txt +++ b/doc/src/fix_ti_spring.txt @@ -34,7 +34,7 @@ by performing a nonequilibrium thermodynamic integration between the solid of interest and an Einstein crystal. A detailed explanation of how to use this command and choose its parameters for optimal performance and accuracy is given in the paper by -"Freitas"_#Freitas. The paper also presents a short summary of the +"Freitas"_#Freitas1. The paper also presents a short summary of the theory of nonequilibrium thermodynamic integrations. The thermodynamic integration procedure is performed by rescaling the @@ -67,13 +67,13 @@ of lambda is kept equal to zero and the fix has no other effect on the dynamics of the system. The processes described above is known as nonequilibrium thermodynamic -integration and is has been shown ("Freitas"_#Freitas) to present a +integration and is has been shown ("Freitas"_#Freitas1) to present a much superior efficiency when compared to standard equilibrium methods. The reason why the switching it is made in both directions (potential to Einstein crystal and back) is to eliminate the dissipated heat due to the nonequilibrium process. Further details about nonequilibrium thermodynamic integration and its implementation -in LAMMPS is available in "Freitas"_#Freitas. +in LAMMPS is available in "Freitas"_#Freitas1. The {function} keyword allows the use of two different lambda paths. Option {1} results in a constant rate of change of lambda with @@ -94,7 +94,7 @@ thermodynamic integration. The use of option {2} is recommended since it results in better accuracy and less dissipation without any increase in computational resources cost. -NOTE: As described in "Freitas"_#Freitas, it is important to keep the +NOTE: As described in "Freitas"_#Freitas1, it is important to keep the center-of-mass fixed during the thermodynamic integration. A nonzero total velocity will result in divergences during the integration due to the fact that the atoms are 'attached' to their equilibrium @@ -156,7 +156,7 @@ The keyword default is function = 1. :line -:link(Freitas) +:link(Freitas1) [(Freitas)] Freitas, Asta, and de Koning, Computational Materials Science, 112, 333 (2016). diff --git a/doc/src/pair_ufm.txt b/doc/src/pair_ufm.txt index 2be35b0d4b..88a22864cc 100644 --- a/doc/src/pair_ufm.txt +++ b/doc/src/pair_ufm.txt @@ -62,7 +62,7 @@ of a run: variable prefactor equal ramp(10,100) fix 1 all adapt 1 pair ufm epsilon * * v_prefactor :pre -NOTE: The thermodynamic integration procedure can be performed with this potential using "fix adapt"_fix_adapt.html. This command will rescale the force on each atom by varying a scale variable, which always starts with value 1.0. The syntax is the same described above, however, changing epsilon to scale. A detailed explanation of how to use this command and perform nonequilibrium thermodynamic integration in LAMMPS is given in the paper by "(Freitas)"_#Freitas. +NOTE: The thermodynamic integration procedure can be performed with this potential using "fix adapt"_fix_adapt.html. This command will rescale the force on each atom by varying a scale variable, which always starts with value 1.0. The syntax is the same described above, however, changing epsilon to scale. A detailed explanation of how to use this command and perform nonequilibrium thermodynamic integration in LAMMPS is given in the paper by "(Freitas)"_#Freitas2. :line @@ -131,5 +131,5 @@ This pair style can only be used via the {pair} keyword of the [(Paula Leite2016)] Paula Leite , Freitas, Azevedo, and de Koning, J Chem Phys, 126, 044509 (2016). -:link(Freitas) -[(Freitas)] Freitas, Asta, and de Koning, Computational Materials Science, 112, 333 (2016). \ No newline at end of file +:link(Freitas2) +[(Freitas)] Freitas, Asta, and de Koning, Computational Materials Science, 112, 333 (2016).