PeTar
N-body code for collisional gravitational systems
soft_force.hpp
Go to the documentation of this file.
1 #pragma once
2 #ifdef INTRINSIC_K
4 #endif
5 #ifdef INTRINSIC_X86
7 #endif
8 
9 
10 // Neighbor search function
12  void operator () (const EPISoft * ep_i,
13  const PS::S32 n_ip,
14  const EPJSoft * ep_j,
15  const PS::S32 n_jp,
16  ForceSoft * force){
17  for(PS::S32 i=0; i<n_ip; i++){
18  const PS::F64vec xi = ep_i[i].pos;
19  PS::S32 n_ngb_i = 0;
20  for(PS::S32 j=0; j<n_jp; j++){
21  const PS::F64vec rij = xi - ep_j[j].pos;
22  const PS::F64 r2 = rij * rij;
23  const PS::F64 r_search = std::max(ep_i[i].r_search,ep_j[j].r_search);
24  if(r2 < r_search*r_search){
25 #ifdef SAVE_NEIGHBOR_ID_IN_FORCE_KERNEL
26  force[i].id_ngb[n_ngb_i & 0x3] = ep_j[j].id;
27 #endif
28  n_ngb_i++;
29  }
30  }
31  force[i].n_ngb = n_ngb_i;
32  }
33  }
34 };
35 
39  void operator () (const EPISoft * ep_i,
40  const PS::S32 n_ip,
41  const EPJSoft * ep_j,
42  const PS::S32 n_jp,
43  ForceSoft * force){
44  const PS::F64 eps2 = EPISoft::eps * EPISoft::eps;
45  const PS::F64 r_out2 = EPISoft::r_out*EPISoft::r_out;
47  for(PS::S32 i=0; i<n_ip; i++){
48  const PS::F64vec xi = ep_i[i].pos;
49  //PS::S64 id_i = ep_i[i].id;
50  PS::F64vec ai = 0.0;
51  PS::F64 poti = 0.0;
52  PS::S32 n_ngb_i = 0;
53  for(PS::S32 j=0; j<n_jp; j++){
54  //if(id_i == ep_j[j].id){
55  // n_ngb_i++;
56  // continue;
57  //}
58  const PS::F64vec rij = xi - ep_j[j].pos;
59  const PS::F64 r2 = rij * rij;
60  const PS::F64 r2_eps = r2 + eps2;
61  const PS::F64 r_search = std::max(ep_i[i].r_search,ep_j[j].r_search);
62  if(r2 < r_search*r_search){
63  n_ngb_i++;
64  }
65  const PS::F64 r2_tmp = (r2_eps > r_out2) ? r2_eps : r_out2;
66  const PS::F64 r_inv = 1.0/sqrt(r2_tmp);
67  const PS::F64 m_r = ep_j[j].mass * r_inv;
68  const PS::F64 m_r3 = m_r * r_inv * r_inv;
69  ai -= m_r3 * rij;
70  poti -= m_r;
71  }
72  //std::cerr<<"poti= "<<poti<<std::endl;
73  force[i].acc += G*ai;
74 #ifdef KDKDK_4TH
75  force[i].acorr = 0.0;
76 #endif
77  force[i].pot += G*poti;
78 #ifdef NAN_CHECK_DEBUG
79  assert(!std::isnan(ai[0]));
80  assert(!std::isnan(ai[1]));
81  assert(!std::isnan(ai[2]));
82  assert(!std::isnan(poti));
83 #endif
84  force[i].n_ngb = n_ngb_i;
85  }
86  }
87 };
88 
89 #ifdef KDKDK_4TH
90 struct CalcCorrectEpEpWithLinearCutoffNoSimd{
91  void operator () (const EPISoft * ep_i,
92  const PS::S32 n_ip,
93  const EPJSoft * ep_j,
94  const PS::S32 n_jp,
95  ForceSoft * force){
96  const PS::F64 eps2 = EPISoft::eps * EPISoft::eps;
97  const PS::F64 r_out2 = EPISoft::r_out*EPISoft::r_out;
98 
99  for(PS::S32 i=0; i<n_ip; i++){
100  PS::F64vec acorr = 0.0;
101  const PS::F64vec posi = ep_i[i].pos;
102  const PS::F64vec acci = ep_i[i].acc;
103  for(PS::S32 j=0; j<n_jp; j++){
104  const PS::F64vec dr = posi - ep_j[j].pos;
105  const PS::F64vec da = acci - ep_j[j].acc;
106  const PS::F64 r2 = dr * dr + eps2;
107  const PS::F64 drda = dr * da;
108  const PS::F64 r2_tmp = (r2 > r_out2) ? r2 : r_out2;
109  const PS::F64 r_inv = 1.0/sqrt(r2_tmp);
110  const PS::F64 r2_inv = r_inv*r_inv;
111  const PS::F64 m_r = ep_j[j].mass * r_inv;
112  const PS::F64 m_r3 = m_r * r2_inv;
113 
114  const PS::F64 alpha = 3.0 * drda * r2_inv;
115  acorr -= m_r3 * (da - alpha * dr);
116  }
117  //std::cerr<<"poti= "<<poti<<std::endl;
118  force[i].acorr += 2.0 * acorr;
119  force[i].acc = acci;
120  }
121  }
122 };
123 #endif
124 
126  template<class Tsp>
127  void operator () (const EPISoft * ep_i,
128  const PS::S32 n_ip,
129  const Tsp * sp_j,
130  const PS::S32 n_jp,
131  ForceSoft * force){
132  const PS::F64 eps2 = EPISoft::eps * EPISoft::eps;
133  const PS::F64 G = ForceSoft::grav_const;
134  for(PS::S32 i=0; i<n_ip; i++){
135  PS::F64vec xi = ep_i[i].pos;
136  PS::F64vec ai = 0.0;
137  PS::F64 poti = 0.0;
138  for(PS::S32 j=0; j<n_jp; j++){
139  PS::F64vec rij = xi - sp_j[j].getPos();
140  PS::F64 r3_inv = rij * rij + eps2;
141  PS::F64 r_inv = 1.0/sqrt(r3_inv);
142  r3_inv = r_inv * r_inv;
143  r_inv *= sp_j[j].getCharge();
144  r3_inv *= r_inv;
145  ai -= r3_inv * rij;
146  poti -= r_inv;
147  }
148  force[i].acc += G*ai;
149  force[i].pot += G*poti;
150 #ifdef NAN_CHECK_DEBUG
151  assert(!std::isnan(ai[0]));
152  assert(!std::isnan(ai[1]));
153  assert(!std::isnan(ai[2]));
154  assert(!std::isnan(poti));
155 #endif
156  }
157  }
158 };
159 
161  template<class Tsp>
162  void operator () (const EPISoft * ep_i,
163  const PS::S32 n_ip,
164  const Tsp * sp_j,
165  const PS::S32 n_jp,
166  ForceSoft * force){
167  const PS::F64 eps2 = EPISoft::eps * EPISoft::eps;
168  const PS::F64 G = ForceSoft::grav_const;
169 // assert(n_jp==0);
170  for(PS::S32 ip=0; ip<n_ip; ip++){
171  PS::F64vec xi = ep_i[ip].pos;
172  PS::F64vec ai = 0.0;
173  PS::F64 poti = 0.0;
174  for(PS::S32 jp=0; jp<n_jp; jp++){
175  PS::F64 mj = sp_j[jp].mass;
176  PS::F64vec xj= sp_j[jp].pos;
177  PS::F64vec rij= xi - xj;
178  PS::F64 r2 = rij * rij + eps2;
179  PS::F64mat qj = sp_j[jp].quad;
180  PS::F64 tr = qj.getTrace();
181  PS::F64vec qr( (qj.xx*rij.x + qj.xy*rij.y + qj.xz*rij.z),
182  (qj.yy*rij.y + qj.yz*rij.z + qj.xy*rij.x),
183  (qj.zz*rij.z + qj.xz*rij.x + qj.yz*rij.y) );
184  PS::F64 qrr = qr * rij;
185  PS::F64 r_inv = 1.0f/sqrt(r2);
186  PS::F64 r2_inv = r_inv * r_inv;
187  PS::F64 r3_inv = r2_inv * r_inv;
188  PS::F64 r5_inv = r2_inv * r3_inv * 1.5;
189  PS::F64 qrr_r5 = r5_inv * qrr;
190  PS::F64 qrr_r7 = r2_inv * qrr_r5;
191  PS::F64 A = mj*r3_inv - tr*r5_inv + 5*qrr_r7;
192  PS::F64 B = -2.0*r5_inv;
193  ai -= A*rij + B*qr;
194  poti -= mj*r_inv - 0.5*tr*r3_inv + qrr_r5;
195  }
196  force[ip].acc += G*ai;
197  force[ip].pot += G*poti;
198  }
199  }
200 };
201 
202 template<class Tpi, class Tpj>
204  void operator () (const Tpi * ep_i,
205  const PS::S32 n_ip,
206  const Tpj * ep_j,
207  const PS::S32 n_jp,
208  ForceSoft * force){
209  //const PS::F64 eps2 = EPISoft::eps * EPISoft::eps;
210  const PS::F64 eps2 = 0;
211  const PS::F64 G = ForceSoft::grav_const;
212  for(PS::S32 i=0; i<n_ip; i++){
213  PS::F64vec xi = ep_i[i].pos;
214  PS::F64vec ai = 0.0;
215  PS::F64 poti = 0.0;
216  for(PS::S32 j=0; j<n_jp; j++){
217  PS::F64vec rij = xi - ep_j[j].pos;
218  PS::F64 r3_inv = rij * rij + eps2;
219  PS::F64 r_inv = 1.0/sqrt(r3_inv);
220  r3_inv = r_inv * r_inv;
221  r_inv *= ep_j[j].mass;
222  r3_inv *= r_inv;
223  ai -= r3_inv * rij;
224  poti -= r_inv;
225  }
226  force[i].acc += G*ai;
227  force[i].pot += G*poti;
228 #ifdef NAN_CHECK_DEBUG
229  assert(!std::isnan(ai[0]));
230  assert(!std::isnan(ai[1]));
231  assert(!std::isnan(ai[2]));
232  assert(!std::isnan(poti));
233 #endif
234  }
235  }
236 };
237 
238 #ifdef USE_SIMD
239 struct SearchNeighborEpEpSimd{
240  void operator () (const EPISoft * ep_i,
241  const PS::S32 n_ip,
242  const EPJSoft * ep_j,
243  const PS::S32 n_jp,
244  ForceSoft * force){
245  #ifdef __HPC_ACE__
246  PhantomGrapeQuad pg;
247  #else
248  #if defined(CALC_EP_64bit) || defined(CALC_EP_MIX)
249  static thread_local PhantomGrapeQuad64Bit pg;
250  #else
251  static thread_local PhantomGrapeQuad pg;
252  #endif
253  #endif
254  if(n_ip > pg.NIMAX || n_jp > pg.NJMAX){
255  std::cout<<"ni= "<<n_ip<<" NIMAX= "<<pg.NIMAX<<" nj= "<<n_jp<<" NJMAX= "<<pg.NJMAX<<std::endl;
256  }
257  assert(n_ip<=pg.NIMAX);
258  assert(n_jp<=pg.NJMAX);
259  for(PS::S32 i=0; i<n_ip; i++){
260  const PS::F64vec pos_i = ep_i[i].getPos();
261  pg.set_xi_one(i, pos_i.x, pos_i.y, pos_i.z, ep_i[i].r_search);
262  }
263  PS::S32 loop_max = (n_jp-1) / PhantomGrapeQuad::NJMAX + 1;
264  for(PS::S32 loop=0; loop<loop_max; loop++){
265  const PS::S32 ih = PhantomGrapeQuad::NJMAX*loop;
266  const PS::S32 n_jp_tmp = ( (n_jp - ih) < PhantomGrapeQuad::NJMAX) ? (n_jp - ih) : PhantomGrapeQuad::NJMAX;
267  const PS::S32 it =ih + n_jp_tmp;
268  PS::S32 i_tmp = 0;
269  for(PS::S32 i=ih; i<it; i++, i_tmp++){
270  const PS::F64 m_j = ep_j[i].getCharge();
271  const PS::F64vec pos_j = ep_j[i].getPos();
272  pg.set_epj_one(i_tmp, pos_j.x, pos_j.y, pos_j.z, m_j, ep_j[i].r_search);
273 
274  }
275  pg.run_epj_for_neighbor_count(n_ip, n_jp_tmp);
276  for(PS::S32 i=0; i<n_ip; i++){
277  PS::F64 n_ngb = 0;
278  pg.accum_accp_one(i, n_ngb);
279  force[i].n_ngb += (PS::S32)(n_ngb*1.00001);
280  }
281  }
282  }
283 };
284 
285 template <class Tpi, class Tpj>
286 struct CalcForcePPSimd{
287  void operator () (const Tpi * ep_i,
288  const PS::S32 n_ip,
289  const Tpj * ep_j,
290  const PS::S32 n_jp,
291  ForceSoft * force){
292  const PS::F64 G = ForceSoft::grav_const;
293  PS::S32 ep_j_list[n_jp], n_jp_local=0;
294  for (PS::S32 i=0; i<n_jp; i++){
295  if(ep_j[i].mass>0) ep_j_list[n_jp_local++] = i;
296  }
297  #ifdef __HPC_ACE__
298  PhantomGrapeQuad pg;
299  #else
300  #if defined(CALC_EP_64bit) || defined(CALC_EP_MIX)
301  static thread_local PhantomGrapeQuad64Bit pg;
302  #else
303  static thread_local PhantomGrapeQuad pg;
304  #endif
305  #endif
306  if(n_ip > pg.NIMAX || n_jp > pg.NJMAX){
307  std::cout<<"ni= "<<n_ip<<" NIMAX= "<<pg.NIMAX<<" nj= "<<n_jp<<" NJMAX= "<<pg.NJMAX<<std::endl;
308  }
309  assert(n_ip<=pg.NIMAX);
310  assert(n_jp<=pg.NJMAX);
311  pg.set_eps2(0.0);
312  pg.set_r_crit2(0.0);
313  for(PS::S32 i=0; i<n_ip; i++){
314  const PS::F64vec pos_i = ep_i[i].pos;
315  pg.set_xi_one(i, pos_i.x, pos_i.y, pos_i.z, 0.0);
316  }
317  PS::S32 loop_max = (n_jp_local-1) / PhantomGrapeQuad::NJMAX + 1;
318  for(PS::S32 loop=0; loop<loop_max; loop++){
319  const PS::S32 ih = PhantomGrapeQuad::NJMAX*loop;
320  const PS::S32 n_jp_tmp = ( (n_jp_local - ih) < PhantomGrapeQuad::NJMAX) ? (n_jp_local - ih) : PhantomGrapeQuad::NJMAX;
321  const PS::S32 it =ih + n_jp_tmp;
322  PS::S32 i_tmp = 0;
323  for(PS::S32 i=ih; i<it; i++, i_tmp++){
324  const PS::S32 ij = ep_j_list[i];
325  const PS::F64 m_j = ep_j[ij].mass;
326  const PS::F64vec pos_j = ep_j[ij].pos;
327  pg.set_epj_one(i_tmp, pos_j.x, pos_j.y, pos_j.z, m_j, 0.0);
328 
329  }
330  pg.run_epj_for_p3t_with_linear_cutoff(n_ip, n_jp_tmp);
331  for(PS::S32 i=0; i<n_ip; i++){
332  PS::F64 p = 0;
333  PS::F64 a[3]= {0,0,0};
334  PS::F64 n_ngb = 0;
335  pg.accum_accp_one(i, a[0], a[1], a[2], p, n_ngb);
336  force[i].acc[0] += G*a[0];
337  force[i].acc[1] += G*a[1];
338  force[i].acc[2] += G*a[2];
339  force[i].pot += G*p;
340  force[i].n_ngb += (PS::S32)(n_ngb*1.00001);
341  }
342  }
343  }
344 };
345 
347 
350 struct CalcForceEpEpWithLinearCutoffSimd{
351  void operator () (const EPISoft * ep_i,
352  const PS::S32 n_ip,
353  const EPJSoft * ep_j,
354  const PS::S32 n_jp,
355  ForceSoft * force){
356  const PS::F64 eps2 = EPISoft::eps * EPISoft::eps;
357  const PS::F64 G = ForceSoft::grav_const;
358  PS::S32 ep_j_list[n_jp], n_jp_local=0;
359  PS::S32 ep_i_list[n_ip], n_ip_local=0;
360  for (PS::S32 i=0; i<n_jp; i++){
361  if(ep_j[i].mass>0) ep_j_list[n_jp_local++] = i;
362  }
363 // std::cerr<<"n_jp="<<n_jp<<" reduced n_jp="<<n_jp_local<<std::endl;
364 // const PS::F64 r_crit2 = EPJSoft::r_search * EPJSoft::r_search;
365  #ifdef __HPC_ACE__
366  PhantomGrapeQuad pg;
367  #else
368  #if defined(CALC_EP_64bit) || defined(CALC_EP_MIX)
369  static thread_local PhantomGrapeQuad64Bit pg;
370  #else
371  static thread_local PhantomGrapeQuad pg;
372  #endif
373  #endif
374  if(n_ip > pg.NIMAX || n_jp > pg.NJMAX){
375  std::cout<<"ni= "<<n_ip<<" NIMAX= "<<pg.NIMAX<<" nj= "<<n_jp<<" NJMAX= "<<pg.NJMAX<<std::endl;
376  }
377  assert(n_ip<=pg.NIMAX);
378  assert(n_jp<=pg.NJMAX);
379  pg.set_eps2(eps2);
380  pg.set_r_crit2(EPISoft::r_out*EPISoft::r_out);
381  for(PS::S32 i=0; i<n_ip; i++){
382  // remove the orbital sample for the force calculation
383  if (ep_i[i].type==1) {
384  ep_i_list[n_ip_local] = i;
385  const PS::F64vec pos_i = ep_i[i].getPos();
386  pg.set_xi_one(n_ip_local, pos_i.x, pos_i.y, pos_i.z, ep_i[i].r_search);
387  n_ip_local++;
388  }
389  }
390 // std::cerr<<"n_ip="<<n_ip<<" reduced n_ip="<<n_ip_local<<std::endl;
391  PS::S32 loop_max = (n_jp_local-1) / PhantomGrapeQuad::NJMAX + 1;
392  for(PS::S32 loop=0; loop<loop_max; loop++){
393  const PS::S32 ih = PhantomGrapeQuad::NJMAX*loop;
394  const PS::S32 n_jp_tmp = ( (n_jp_local - ih) < PhantomGrapeQuad::NJMAX) ? (n_jp_local - ih) : PhantomGrapeQuad::NJMAX;
395  const PS::S32 it =ih + n_jp_tmp;
396  PS::S32 i_tmp = 0;
397  for(PS::S32 i=ih; i<it; i++, i_tmp++){
398  const PS::S32 ij = ep_j_list[i];
399  const PS::F64 m_j = ep_j[ij].getCharge();
400  const PS::F64vec pos_j = ep_j[ij].getPos();
401  pg.set_epj_one(i_tmp, pos_j.x, pos_j.y, pos_j.z, m_j, ep_j[ij].r_search);
402 
403  }
404  pg.run_epj_for_p3t_with_linear_cutoff(n_ip, n_jp_tmp);
405  for(PS::S32 k=0; k<n_ip_local; k++){
406  PS::S32 i=ep_i_list[k];
407  PS::F64 p = 0;
408  PS::F64 a[3]= {0,0,0};
409  PS::F64 n_ngb = 0;
410  pg.accum_accp_one(k, a[0], a[1], a[2], p, n_ngb);
411 #ifdef NAN_CHECK_DEBUG
412  assert(!std::isnan(a[0]));
413  assert(!std::isnan(a[1]));
414  assert(!std::isnan(a[2]));
415  assert(!std::isnan(p));
416 #endif
417  force[i].acc[0] += G*a[0];
418  force[i].acc[1] += G*a[1];
419  force[i].acc[2] += G*a[2];
420  force[i].pot += G*p;
421  force[i].n_ngb += (PS::S32)(n_ngb*1.00001);
422  }
423  }
424  }
425 };
426 
427 struct CalcForceEpSpMonoSimd{
428  template<class Tsp>
429  void operator () (const EPISoft * ep_i,
430  const PS::S32 n_ip,
431  //const PS::SPJMonopoleScatter * sp_j,
432  const Tsp * sp_j,
433  const PS::S32 n_jp,
434  ForceSoft * force){
435  const PS::F64 eps2 = EPISoft::eps * EPISoft::eps;
436  const PS::F64 G = ForceSoft::grav_const;
437  PS::S32 ep_i_list[n_ip], n_ip_local=0;
438 #ifdef __HPC_ACE__
439  PhantomGrapeQuad pg;
440 #else
441  #if defined(CALC_EP_64bit)
442  static thread_local PhantomGrapeQuad64Bit pg;
443  #else
444  static thread_local PhantomGrapeQuad pg;
445  #endif
446 #endif
447  assert(n_ip<=pg.NIMAX);
448  assert(n_jp<=pg.NJMAX);
449  pg.set_eps2(eps2);
450  for(PS::S32 i=0; i<n_ip; i++){
451  // remove the orbital sample for the force calculation
452  if (ep_i[i].type==1) {
453  ep_i_list[n_ip_local] = i;
454  const PS::F64vec pos_i = ep_i[i].getPos();
455  pg.set_xi_one(n_ip_local, pos_i.x, pos_i.y, pos_i.z, 0.0);
456  n_ip_local++;
457  }
458  }
459  PS::S32 loop_max = (n_jp-1) / PhantomGrapeQuad::NJMAX + 1;
460  for(PS::S32 loop=0; loop<loop_max; loop++){
461  const PS::S32 ih = PhantomGrapeQuad::NJMAX*loop;
462  const PS::S32 n_jp_tmp = ( (n_jp - ih) < PhantomGrapeQuad::NJMAX) ? (n_jp - ih) : PhantomGrapeQuad::NJMAX;
463  const PS::S32 it = ih + n_jp_tmp;
464  PS::S32 i_tmp = 0;
465  for(PS::S32 i=ih; i<it; i++, i_tmp++){
466  const PS::F64 m_j = sp_j[i].getCharge();
467  const PS::F64vec pos_j = sp_j[i].getPos();
468  pg.set_epj_one(i_tmp, pos_j.x, pos_j.y, pos_j.z, m_j, 0.0);
469  }
470  pg.run_epj(n_ip, n_jp_tmp);
471  for(PS::S32 k=0; k<n_ip_local; k++){
472  PS::S32 i=ep_i_list[k];
473  PS::F64 p = 0;
474  PS::F64 a[3]= {0,0,0};
475  pg.accum_accp_one(k, a[0], a[1], a[2], p);
476  force[i].acc[0] += G*a[0];
477  force[i].acc[1] += G*a[1];
478  force[i].acc[2] += G*a[2];
479  force[i].pot += G*p;
480 #ifdef NAN_CHECK_DEBUG
481  assert(!std::isnan(a[0]));
482  assert(!std::isnan(a[1]));
483  assert(!std::isnan(a[2]));
484  assert(!std::isnan(p));
485 #endif
486  }
487  }
488  }
489 };
490 
491 struct CalcForceEpSpQuadSimd{
492  template<class Tsp>
493  void operator () (const EPISoft * ep_i,
494  const PS::S32 n_ip,
495  const Tsp * sp_j,
496  const PS::S32 n_jp,
497  ForceSoft * force){
498  const PS::F64 eps2 = EPISoft::eps * EPISoft::eps;
499  const PS::F64 G = ForceSoft::grav_const;
500  PS::S32 ep_i_list[n_ip], n_ip_local=0;
501  #ifdef __HPC_ACE__
502  PhantomGrapeQuad pg;
503  #else
504  #if defined(CALC_EP_64bit)
505  static thread_local PhantomGrapeQuad64Bit pg;
506  #else
507  static thread_local PhantomGrapeQuad pg;
508  #endif
509  #endif
510  assert(n_ip<=pg.NIMAX);
511  assert(n_jp<=pg.NJMAX);
512  pg.set_eps2(eps2);
513  for(PS::S32 i=0; i<n_ip; i++){
514  // remove the orbital sample for the force calculation
515  if (ep_i[i].type==1) {
516  ep_i_list[n_ip_local] = i;
517  const PS::F64vec pos_i = ep_i[i].getPos();
518  pg.set_xi_one(n_ip_local, pos_i.x, pos_i.y, pos_i.z, 0.0);
519  n_ip_local++;
520  }
521  }
522  PS::S32 loop_max = (n_jp-1) / PhantomGrapeQuad::NJMAX + 1;
523  for(PS::S32 loop=0; loop<loop_max; loop++){
524  const PS::S32 ih = PhantomGrapeQuad::NJMAX*loop;
525  const PS::S32 n_jp_tmp = ( (n_jp - ih) < PhantomGrapeQuad::NJMAX) ? (n_jp - ih) : PhantomGrapeQuad::NJMAX;
526  const PS::S32 it = ih + n_jp_tmp;
527  PS::S32 i_tmp = 0;
528  for(PS::S32 i=ih; i<it; i++, i_tmp++){
529  const PS::F64 m_j = sp_j[i].getCharge();
530  const PS::F64vec pos_j = sp_j[i].getPos();
531  const PS::F64mat q = sp_j[i].quad;
532  pg.set_spj_one(i, pos_j.x, pos_j.y, pos_j.z, m_j,
533  q.xx, q.yy, q.zz, q.xy, q.yz, q.xz);
534  }
535  pg.run_spj(n_ip, n_jp_tmp);
536  for(PS::S32 k=0; k<n_ip_local; k++){
537  PS::S32 i=ep_i_list[k];
538  PS::F64 p = 0;
539  PS::F64 a[3]= {0,0,0};
540  pg.accum_accp_one(k, a[0], a[1], a[2], p);
541 #ifdef NAN_CHECK_DEBUG
542  assert(!std::isnan(a[0]));
543  assert(!std::isnan(a[1]));
544  assert(!std::isnan(a[2]));
545  assert(!std::isnan(p));
546 #endif
547  force[i].acc[0] += G*a[0];
548  force[i].acc[1] += G*a[1];
549  force[i].acc[2] += G*a[2];
550  force[i].pot += G*p;
551  }
552  }
553  }
554 };
555 #endif
CalcForceEpSpMonoNoSimd::operator()
void operator()(const EPISoft *ep_i, const PS::S32 n_ip, const Tsp *sp_j, const PS::S32 n_jp, ForceSoft *force)
Definition: soft_force.hpp:127
EPISoft
Definition: soft_ptcl.hpp:271
FPSoft
Definition: soft_ptcl.hpp:26
phantomquad_for_p3t_k.hpp
CalcForceEpEpWithLinearCutoffNoSimd::operator()
void operator()(const EPISoft *ep_i, const PS::S32 n_ip, const EPJSoft *ep_j, const PS::S32 n_jp, ForceSoft *force)
Definition: soft_force.hpp:39
phantomquad_for_p3t_x86.hpp
ForceSoft::clear
void clear()
gravitational constant
Definition: soft_ptcl.hpp:16
EPJSoft::mass
PS::F64 mass
Definition: soft_ptcl.hpp:314
PIKG::S32
int32_t S32
Definition: pikg_vector.hpp:24
EPJSoft::getPos
PS::F64vec getPos() const
Definition: soft_ptcl.hpp:346
ParticleDistributionGenerator::makePlummerModel
static void makePlummerModel(const double mass_glb, const long long int n_glb, const long long int n_loc, double *&mass, PS::F64vec *&pos, PS::F64vec *&vel, const double eng=-0.25, const int seed=0)
Definition: particle_distribution_generator.hpp:173
soft_ptcl.hpp
ForceSoft
Definition: soft_ptcl.hpp:4
PIKG::F64vec
Vector3< F64 > F64vec
Definition: pikg_vector.hpp:167
CalcForceEpSpQuadNoSimd
Definition: soft_force.hpp:160
EPISoft::pos
PS::F64vec pos
Definition: soft_ptcl.hpp:274
PhantomGrapeQuad::NJMAX
@ NJMAX
Definition: phantomquad_for_p3t_k.hpp:8
EPISoft::getPos
PS::F64vec getPos() const
Definition: soft_ptcl.hpp:283
CalcForceEpEpWithLinearCutoffNoSimd
FORCE FUNCTOR.
Definition: soft_force.hpp:38
RetrieveForceCUDA
PS::S32 RetrieveForceCUDA(const PS::S32 tag, const PS::S32 n_walk, const PS::S32 *ni, ForceSoft **force)
PIKG::F64
double F64
Definition: pikg_vector.hpp:17
ForceSoft::pot
PS::F64 pot
soft acceleration (c.m.: averaged force from orbital particles; tensor: c.m. is substracted)
Definition: soft_ptcl.hpp:7
particle_distribution_generator.hpp
EPISoft::copyFromFP
void copyFromFP(const FPSoft &fp)
Definition: soft_ptcl.hpp:284
force_gpu_cuda.hpp
SearchNeighborEpEpNoSimd
Definition: soft_force.hpp:11
ParticleBase::pos
PS::F64vec pos
Definition: particle_base.hpp:24
ParticleBase::vel
PS::F64vec vel
Definition: particle_base.hpp:25
EPISoft::r_out
static PS::F64 r_out
Definition: soft_ptcl.hpp:282
io.hpp
SearchNeighborEpEpNoSimd::operator()
void operator()(const EPISoft *ep_i, const PS::S32 n_ip, const EPJSoft *ep_j, const PS::S32 n_jp, ForceSoft *force)
Definition: soft_force.hpp:12
EPJSoft::getCharge
PS::F64 getCharge() const
Definition: soft_ptcl.hpp:348
CalcForceEpSpQuadNoSimd::operator()
void operator()(const EPISoft *ep_i, const PS::S32 n_ip, const Tsp *sp_j, const PS::S32 n_jp, ForceSoft *force)
Definition: soft_force.hpp:162
main
int main(int argc, char **argv)
Definition: simd_test.cxx:59
CalcForcePPNoSimd
Definition: soft_force.hpp:203
Ptcl::changeover
ChangeOver changeover
Definition: ptcl.hpp:41
EPJSoft
Definition: soft_ptcl.hpp:311
CalcForcePPNoSimd::operator()
void operator()(const Tpi *ep_i, const PS::S32 n_ip, const Tpj *ep_j, const PS::S32 n_jp, ForceSoft *force)
Definition: soft_force.hpp:204
CalcForceWithLinearCutoffCUDA
Definition: force_gpu_cuda.hpp:137
EPJSoft::pos
PS::F64vec pos
Definition: soft_ptcl.hpp:315
force_fugaku.hpp
EPJSoft::id
PS::S64 id
Definition: soft_ptcl.hpp:313
EPISoft::eps
static PS::F64 eps
Definition: soft_ptcl.hpp:281
ParticleBase::mass
PS::F64 mass
Definition: particle_base.hpp:23
PIKG::max
T max(const Vector3< T > &v)
Definition: pikg_vector.hpp:143
GroupDataDeliver::artificial
ArtificialParticleInformation artificial
Definition: ptcl.hpp:18
static_variables.hpp
ArtificialParticleInformation::setParticleTypeToSingle
void setParticleTypeToSingle()
set particle type to single
Definition: artificial_particles.hpp:78
setSpj
void setSpj(const PS::F64 N, SPJSoft &sp)
Definition: simd_test.cxx:44
ChangeOver::setR
void setR(const Float &_m_fac, const Float &_r_in, const Float &_r_out)
set r_in and r_out for changeover function
Definition: changeover.hpp:44
ForceSoft::acc
PS::F64vec acc
Definition: soft_ptcl.hpp:6
Ptcl::id
PS::S64 id
Definition: ptcl.hpp:39
Ptcl::group_data
GroupDataDeliver group_data
Definition: ptcl.hpp:40
PhantomGrapeQuad::NIMAX
@ NIMAX
Definition: phantomquad_for_p3t_k.hpp:7
CalcForceEpSpMonoNoSimd
Definition: soft_force.hpp:125
ForceSoft::n_ngb
PS::S64 n_ngb
full potential
Definition: soft_ptcl.hpp:14
soft_force.hpp
ForceSoft::grav_const
static PS::F64 grav_const
neighbor number+1
Definition: soft_ptcl.hpp:15
SPJSoft
#define SPJSoft
Definition: simd_test.cxx:37
PhantomGrapeQuad
Definition: phantomquad_for_p3t_k.hpp:4