PeTar
N-body code for collisional gravitational systems
force_gpu_cuda.hpp
Go to the documentation of this file.
1 #pragma once
2 #include <iostream>
3 #include <particle_simulator.hpp>
4 #include "soft_ptcl.hpp"
5 
6 #ifdef GPU_PROFILE
7 #include "profile.hpp"
8 extern struct GPUProfile{
9 public:
10  Tprofile copy;
11  Tprofile send;
12  Tprofile recv;
13  Tprofile calc;
14  const PS::S32 n_profile;
15 
16  GPUProfile():
17  copy (Tprofile("copy ")),
18  send (Tprofile("send ")),
19  recv (Tprofile("receive ")),
20  calc (Tprofile("calc_force ")),
21  n_profile(4) {}
22 
23  void print(std::ostream & fout, const PS::F64 time_sys, const PS::S64 n_loop=1){
24  fout<<"Time: "<<time_sys<<std::endl;
25 
26  for(PS::S32 i=0; i<n_profile; i++) {
27  Tprofile* iptr = (Tprofile*)this+i;
28  iptr->print(fout, n_loop);
29  }
30  }
31 
32  void dump(std::ostream & fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
33  for(PS::S32 i=0; i<n_profile; i++) {
34  Tprofile* iptr = (Tprofile*)this+i;
35  iptr->dump(fout, n_loop, width);
36  }
37  }
38 
39  void dumpName(std::ostream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH) {
40  for(PS::S32 i=0; i<n_profile; i++) {
41  Tprofile* iptr = (Tprofile*)this+i;
42  iptr->dumpName(fout, width);
43  }
44  }
45 
46  void clear(){
47  for(PS::S32 i=0; i<n_profile; i++) {
48  Tprofile* iptr = (Tprofile*)this+i;
49  iptr->reset();
50  }
51  }
52 
53 } gpu_profile;
54 
55 extern struct GPUCounter{
56 public:
57  NumCounter n_walk;
58  NumCounter n_epi;
59  NumCounter n_epj;
60  NumCounter n_spj;
61  NumCounter n_call;
62  const PS::S32 n_counter;
63 
64  GPUCounter():
65  n_walk (NumCounter("n_walk ")),
66  n_epi (NumCounter("n_epi ")),
67  n_epj (NumCounter("n_epj ")),
68  n_spj (NumCounter("n_spj ")),
69  n_call (NumCounter("n_call ")),
70  n_counter(5) {}
71 
72  void dump(std::ostream & fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const{
73  for(PS::S32 i=0; i<n_counter; i++) {
74  NumCounter* iptr = (NumCounter*)this+i;
75  iptr->dump(fout, n_loop, width);
76  }
77  }
78  void dumpName(std::ostream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const{
79  for(PS::S32 i=0; i<n_counter; i++) {
80  NumCounter* iptr = (NumCounter*)this+i;
81  iptr->dumpName(fout, width);
82  }
83  }
84 
85  void clear() {
86  for(PS::S32 i=0; i<n_counter; i++) {
87  NumCounter* iptr = (NumCounter*)this+i;
88  *iptr = 0;
89  }
90  }
91 
92 } gpu_counter;
93 #endif
94 
95 #ifdef USE_QUAD
96 #define SPJSoft PS::SPJQuadrupoleInAndOut
97 #else
98 #define SPJSoft PS::SPJMonopoleInAndOut
99 #endif
100 
101 #ifdef PARTICLE_SIMULATOR_GPU_MULIT_WALK_INDEX
102 
103 struct CalcForceWithLinearCutoffCUDAMultiWalk{
104  PS::S32 my_rank;
105  PS::F64 eps2;
106  PS::F64 rcut2;
107  PS::F64 G;
108 
109  CalcForceWithLinearCutoffCUDAMultiWalk(){}
110 
111  CalcForceWithLinearCutoffCUDAMultiWalk(PS::S32 _rank, PS::F64 _eps2, PS::F64 _rcut2, PS::F64 _G): my_rank(_rank), eps2(_eps2), rcut2(_rcut2), G(_G) {}
112 
113  void initialize(PS::S32 _rank, PS::F64 _eps2, PS::F64 _rcut2, PS::F64 _G){
114  my_rank = _rank;
115  eps2 = _eps2;
116  rcut2 = _rcut2;
117  G = _G;
118  }
119 
120  PS::S32 operator()(const PS::S32 tag,
121  const PS::S32 n_walk,
122  const EPISoft ** epi,
123  const PS::S32 * n_epi,
124  const PS::S32 ** id_epj,
125  const PS::S32 * n_epj,
126  const PS::S32 ** id_spj,
127  const PS::S32 * n_spj,
128  const EPJSoft * epj,
129  const PS::S32 n_epj_tot,
130  const SPJSoft * spj,
131  const PS::S32 n_spj_tot,
132  const bool send_flag);
133 };
134 
135 #else
136 
142 
144 
145  CalcForceWithLinearCutoffCUDA(PS::S32 _rank, PS::F64 _eps2, PS::F64 _rcut2, PS::F64 _G): my_rank(_rank), eps2(_eps2), rcut2(_rcut2), G(_G) {}
146 
147  void initialize(PS::S32 _rank, PS::F64 _eps2, PS::F64 _rcut2, PS::F64 _G){
148  my_rank = _rank;
149  eps2 = _eps2;
150  rcut2 = _rcut2;
151  G = _G;
152  }
153 
154  PS::S32 operator()(const PS::S32 tag,
155  const PS::S32 n_walk,
156  const EPISoft *epi[],
157  const PS::S32 n_epi[],
158  const EPJSoft *epj[],
159  const PS::S32 n_epj[],
160  const SPJSoft *spj[],
161  const PS::S32 n_spj[]);
162 };
163 #endif
164 
166  const PS::S32 n_walk,
167  const PS::S32 * ni,
168  ForceSoft ** force);
169 
CalcForceWithLinearCutoffCUDA::CalcForceWithLinearCutoffCUDA
CalcForceWithLinearCutoffCUDA()
Definition: force_gpu_cuda.hpp:143
EPISoft
Definition: soft_ptcl.hpp:271
CalcForceWithLinearCutoffCUDA::initialize
void initialize(PS::S32 _rank, PS::F64 _eps2, PS::F64 _rcut2, PS::F64 _G)
Definition: force_gpu_cuda.hpp:147
NumCounter
Definition: profile.hpp:233
Tprofile::dump
void dump(std::ostream &fout, const PS::S32 divider=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:200
PIKG::S32
int32_t S32
Definition: pikg_vector.hpp:24
soft_ptcl.hpp
ForceSoft
Definition: soft_ptcl.hpp:4
Tprofile::dumpName
void dumpName(std::ostream &fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:216
SPJSoft
#define SPJSoft
Definition: force_gpu_cuda.hpp:98
RetrieveForceCUDA
PS::S32 RetrieveForceCUDA(const PS::S32 tag, const PS::S32 n_walk, const PS::S32 *ni, ForceSoft **force)
PIKG::F64
double F64
Definition: pikg_vector.hpp:17
CalcForceWithLinearCutoffCUDA::G
PS::F64 G
Definition: force_gpu_cuda.hpp:141
PIKG::S64
int64_t S64
Definition: pikg_vector.hpp:23
profile.hpp
CalcForceWithLinearCutoffCUDA::eps2
PS::F64 eps2
Definition: force_gpu_cuda.hpp:139
Tprofile::print
void print(std::ostream &fout, const PS::S32 divider=1)
Definition: profile.hpp:196
NumCounter::dumpName
void dumpName(std::ostream &fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:262
CalcForceWithLinearCutoffCUDA::my_rank
PS::S32 my_rank
Definition: force_gpu_cuda.hpp:138
Tprofile::reset
void reset()
Definition: profile.hpp:226
EPJSoft
Definition: soft_ptcl.hpp:311
NumCounter::dump
void dump(std::ostream &fout, const PS::S32 divider=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:258
CalcForceWithLinearCutoffCUDA
Definition: force_gpu_cuda.hpp:137
Tprofile
Definition: profile.hpp:176
CalcForceWithLinearCutoffCUDA::rcut2
PS::F64 rcut2
Definition: force_gpu_cuda.hpp:140
CalcForceWithLinearCutoffCUDA::CalcForceWithLinearCutoffCUDA
CalcForceWithLinearCutoffCUDA(PS::S32 _rank, PS::F64 _eps2, PS::F64 _rcut2, PS::F64 _G)
Definition: force_gpu_cuda.hpp:145
PROFILE_PRINT_WIDTH
#define PROFILE_PRINT_WIDTH
Definition: profile.hpp:8
CalcForceWithLinearCutoffCUDA::operator()
PS::S32 operator()(const PS::S32 tag, const PS::S32 n_walk, const EPISoft *epi[], const PS::S32 n_epi[], const EPJSoft *epj[], const PS::S32 n_epj[], const SPJSoft *spj[], const PS::S32 n_spj[])