PeTar
N-body code for collisional gravitational systems
profile.hpp
Go to the documentation of this file.
1 #pragma once
2 #include<particle_simulator.hpp>
3 #include<iomanip>
4 #include<iostream>
5 //#include<fstream>
6 #include<map>
7 
8 #define PROFILE_PRINT_WIDTH 13
9 
10 /*
11 template<class Tdinfo, class Tsystem, class Ttree>
12 class Profile{
13 
14 private:
15  Tdinfo * dinfo_;
16  Tsystem * system_;
17  Ttree * tree_;
18  PS::F64 n_op_ep_ep_;
19  PS::F64 n_op_ep_sp_;
20  PS::F64 flops_per_core_;
21 public:
22 
23  static void dumpTimeProfile(const PS::TimeProfile & tp, const PS::TimeProfile & tp_max, const PS::S32 rank_max[], std::ostream & fout){
24  PS::S32 id = 0;
25  fout<<"collect_sample_particle= "<<tp.collect_sample_particle<<", max= "<<tp_max.collect_sample_particle<<", rank= "<<rank_max[id++]<<std::endl;
26  fout<<"decompose_domain= "<<tp.decompose_domain<<", max= "<<tp_max.decompose_domain<<", rank= "<<rank_max[id++]<<std::endl;
27  fout<<"exchange_particle= "<<tp.exchange_particle<<", max= "<<tp_max.exchange_particle<<", rank= "<<rank_max[id++]<<std::endl;
28  fout<<"set_particle_local_tree= "<<tp.set_particle_local_tree<<", max= "<<tp_max.set_particle_local_tree<<", rank= "<<rank_max[id++]<<std::endl;
29  fout<<"set_root_cell= "<<tp.set_root_cell<<", max= "<<tp_max.set_root_cell<<", rank= "<<rank_max[id++]<<std::endl;
30  fout<<"make_local_tree= "<<tp.make_local_tree<<", max= "<<tp_max.make_local_tree<<", rank= "<<rank_max[id++]<<std::endl;
31  fout<<" morton_sort_local_tree= "<<tp.morton_sort_local_tree<<", max= "<<tp_max.morton_sort_local_tree<<", rank= "<<rank_max[id++]<<std::endl;
32  fout<<" link_cell_local_tree= "<<tp.link_cell_local_tree<<", max= "<<tp_max.link_cell_local_tree<<", rank= "<<rank_max[id++]<<std::endl;
33  fout<<"calc_moment_local_tree= "<<tp.calc_moment_local_tree<<", max= "<<tp_max.calc_moment_local_tree<<", rank= "<<rank_max[id++]<<std::endl;
34  fout<<"make_LET_1st= "<<tp.make_LET_1st<<", max= "<<tp_max.make_LET_1st<<", rank= "<<rank_max[id++]<<std::endl;
35  fout<<"exchange_LET_1st= "<<tp.exchange_LET_1st<<", max= "<<tp_max.exchange_LET_1st<<", rank= "<<rank_max[id++]<<std::endl;
36  fout<<"make_LET_2nd= "<<tp.make_LET_2nd<<", max= "<<tp_max.make_LET_2nd<<", rank= "<<rank_max[id++]<<std::endl;
37  fout<<"exchange_LET_2nd= "<<tp.exchange_LET_2nd<<", max= "<<tp_max.exchange_LET_2nd<<", rank= "<<rank_max[id++]<<std::endl;
38  fout<<"set_particle_global_tree= "<<tp.set_particle_global_tree<<", max= "<<tp_max.set_particle_global_tree<<", rank= "<<rank_max[id++]<<std::endl;
39  fout<<" morton_sort_global_tree= "<<tp.morton_sort_global_tree<<", max= "<<tp_max.morton_sort_global_tree<<", rank= "<<rank_max[id++]<<std::endl;
40  fout<<" link_cell_global_tree= "<<tp.link_cell_global_tree<<", max= "<<tp_max.link_cell_global_tree<<", rank= "<<rank_max[id++]<<std::endl;
41  fout<<"make_global_tree= "<<tp.make_global_tree<<", max= "<<tp_max.make_global_tree<<", rank= "<<rank_max[id++]<<std::endl;
42  fout<<"calc_moment_global_tree= "<<tp.calc_moment_global_tree<<", max= "<<tp_max.calc_moment_global_tree<<", rank= "<<rank_max[id++]<<std::endl;
43  fout<<"calc_force = "<<tp.calc_force<<", max= "<<tp_max.calc_force<<", rank= "<<rank_max[id++]<<std::endl;
44  fout<<std::endl;
45  }
46 
47  static void dumpTimeProfile0(const PS::TimeProfile & tp, const PS::TimeProfile & tp_max, const PS::S32 rank_max[], std::ostream & fout){
48  PS::S32 id = 0;
49  fout<<"collect_sample_particle= "<<tp.collect_sample_particle<<", max= "<<tp_max.collect_sample_particle<<", rank= "<<rank_max[id++]<<std::endl;
50  fout<<"decompose_domain= "<<tp.decompose_domain<<", max= "<<tp_max.decompose_domain<<", rank= "<<rank_max[id++]<<std::endl;
51  fout<<std::endl;
52  }
53 
54  static void dumpTimeProfile1(const PS::TimeProfile & tp, const PS::TimeProfile & tp_max, const PS::S32 rank_max[], std::ostream & fout){
55  PS::S32 id = 2;
56  fout<<"exchange_particle= "<<tp.exchange_particle<<", max= "<<tp_max.exchange_particle<<", rank= "<<rank_max[id++]<<std::endl;
57  fout<<std::endl;
58  }
59 
60  static void dumpTimeProfile2(const PS::TimeProfile & tp, const PS::TimeProfile & tp_max, const PS::S32 rank_max[], std::ostream & fout){
61  PS::S32 id = 3;
62  fout<<"set_particle_local_tree= "<<tp.set_particle_local_tree<<", max= "<<tp_max.set_particle_local_tree<<", rank= "<<rank_max[id++]<<std::endl;
63  fout<<"set_root_cell= "<<tp.set_root_cell<<", max= "<<tp_max.set_root_cell<<", rank= "<<rank_max[id++]<<std::endl;
64  fout<<"make_local_tree= "<<tp.make_local_tree<<", max= "<<tp_max.make_local_tree<<", rank= "<<rank_max[id++]<<std::endl;
65  fout<<" morton_sort_local_tree= "<<tp.morton_sort_local_tree<<", max= "<<tp_max.morton_sort_local_tree<<", rank= "<<rank_max[id++]<<std::endl;
66  fout<<" link_cell_local_tree= "<<tp.link_cell_local_tree<<", max= "<<tp_max.link_cell_local_tree<<", rank= "<<rank_max[id++]<<std::endl;
67  fout<<"calc_moment_local_tree= "<<tp.calc_moment_local_tree<<", max= "<<tp_max.calc_moment_local_tree<<", rank= "<<rank_max[id++]<<std::endl;
68  fout<<"make_LET_1st= "<<tp.make_LET_1st<<", max= "<<tp_max.make_LET_1st<<", rank= "<<rank_max[id++]<<std::endl;
69  fout<<"exchange_LET_1st= "<<tp.exchange_LET_1st<<", max= "<<tp_max.exchange_LET_1st<<", rank= "<<rank_max[id++]<<std::endl;
70 
71  fout<<" exchange_LET_1st__a2a_n= "<<tp.exchange_LET_1st__a2a_n<<", max= "<<tp_max.exchange_LET_1st__a2a_n<<", rank= "<<rank_max[id++]<<std::endl;
72  fout<<" exchange_LET_1st__a2a_ep= "<<tp.exchange_LET_1st__a2a_ep<<", max= "<<tp_max.exchange_LET_1st__a2a_ep<<", rank= "<<rank_max[id++]<<std::endl;
73  fout<<" exchange_LET_1st__a2a_sp= "<<tp.exchange_LET_1st__a2a_sp<<", max= "<<tp_max.exchange_LET_1st__a2a_sp<<", rank= "<<rank_max[id++]<<std::endl;
74 
75  fout<<"make_LET_2nd= "<<tp.make_LET_2nd<<", max= "<<tp_max.make_LET_2nd<<", rank= "<<rank_max[id++]<<std::endl;
76  fout<<"exchange_LET_2nd= "<<tp.exchange_LET_2nd<<", max= "<<tp_max.exchange_LET_2nd<<", rank= "<<rank_max[id++]<<std::endl;
77  fout<<"set_particle_global_tree= "<<tp.set_particle_global_tree<<", max= "<<tp_max.set_particle_global_tree<<", rank= "<<rank_max[id++]<<std::endl;
78  fout<<"make_global_tree= "<<tp.make_global_tree<<", max= "<<tp_max.make_global_tree<<", rank= "<<rank_max[id++]<<std::endl;
79  fout<<" morton_sort_global_tree= "<<tp.morton_sort_global_tree<<", max= "<<tp_max.morton_sort_global_tree<<", rank= "<<rank_max[id++]<<std::endl;
80  fout<<" link_cell_global_tree= "<<tp.link_cell_global_tree<<", max= "<<tp_max.link_cell_global_tree<<", rank= "<<rank_max[id++]<<std::endl;
81  fout<<"calc_moment_global_tree= "<<tp.calc_moment_global_tree<<", max= "<<tp_max.calc_moment_global_tree<<", rank= "<<rank_max[id++]<<std::endl;
82  fout<<"calc_force = "<<tp.calc_force<<", max= "<<tp_max.calc_force<<", rank= "<<rank_max[id++]<<std::endl;
83  fout<<std::endl;
84  }
85 
86  static void getTimeProfileMax(const PS::TimeProfile & tp, const PS::S32 rank, PS::TimeProfile & tp_max, PS::S32 rank_max[]){
87  PS::S32 id = 0;
88  PS::Comm::getMaxValue(tp.collect_sample_particle, rank, tp_max.collect_sample_particle, rank_max[id++]);
89  PS::Comm::getMaxValue(tp.decompose_domain, rank, tp_max.decompose_domain, rank_max[id++]);
90 
91  PS::Comm::getMaxValue(tp.exchange_particle, rank, tp_max.exchange_particle, rank_max[id++]);
92 
93  PS::Comm::getMaxValue(tp.set_particle_local_tree, rank, tp_max.set_particle_local_tree, rank_max[id++]);
94  PS::Comm::getMaxValue(tp.set_root_cell, rank, tp_max.set_root_cell, rank_max[id++]);
95  PS::Comm::getMaxValue(tp.make_local_tree, rank, tp_max.make_local_tree, rank_max[id++]);
96  PS::Comm::getMaxValue(tp.morton_sort_local_tree, rank, tp_max.morton_sort_local_tree, rank_max[id++]);
97  PS::Comm::getMaxValue(tp.link_cell_local_tree, rank, tp_max.link_cell_local_tree, rank_max[id++]);
98  PS::Comm::getMaxValue(tp.calc_moment_local_tree, rank, tp_max.calc_moment_local_tree, rank_max[id++]);
99  PS::Comm::getMaxValue(tp.make_LET_1st, rank, tp_max.make_LET_1st, rank_max[id++]);
100  PS::Comm::getMaxValue(tp.exchange_LET_1st, rank, tp_max.exchange_LET_1st, rank_max[id++]);
101 
102  PS::Comm::getMaxValue(tp.exchange_LET_1st__a2a_n, rank, tp_max.exchange_LET_1st__a2a_n, rank_max[id++]);
103  PS::Comm::getMaxValue(tp.exchange_LET_1st__a2a_ep, rank, tp_max.exchange_LET_1st__a2a_ep, rank_max[id++]);
104  PS::Comm::getMaxValue(tp.exchange_LET_1st__a2a_sp, rank, tp_max.exchange_LET_1st__a2a_sp, rank_max[id++]);
105 
106  PS::Comm::getMaxValue(tp.make_LET_2nd, rank, tp_max.make_LET_2nd, rank_max[id++]);
107  PS::Comm::getMaxValue(tp.exchange_LET_2nd, rank, tp_max.exchange_LET_2nd, rank_max[id++]);
108  PS::Comm::getMaxValue(tp.set_particle_global_tree, rank, tp_max.set_particle_global_tree, rank_max[id++]);
109  PS::Comm::getMaxValue(tp.make_global_tree, rank, tp_max.make_global_tree, rank_max[id++]);
110  PS::Comm::getMaxValue(tp.morton_sort_global_tree, rank, tp_max.morton_sort_global_tree, rank_max[id++]);
111  PS::Comm::getMaxValue(tp.link_cell_global_tree, rank, tp_max.link_cell_global_tree, rank_max[id++]);
112  PS::Comm::getMaxValue(tp.calc_moment_global_tree, rank, tp_max.calc_moment_global_tree, rank_max[id++]);
113  PS::Comm::getMaxValue(tp.calc_force, rank, tp_max.calc_force, rank_max[id++]);
114  }
115 
116  Profile(Tdinfo * _dinfo,
117  Tsystem * _system,
118  Ttree * _tree,
119  const PS::F64 _n_op_ep_ep,
120  const PS::F64 _n_op_ep_sp,
121  const PS::F64 _flops_per_core){
122  dinfo_ = _dinfo;
123  system_ = _system;
124  tree_ = _tree;
125  n_op_ep_ep_ = _n_op_ep_ep;
126  n_op_ep_sp_ = _n_op_ep_sp;
127  flops_per_core_ = _flops_per_core;
128  }
129 
130  void dump(std::ofstream & fout, const PS::F64 time_sys, const PS::S64 n_loop, const PS::F64 wtime_tot){
131  //static PS::S64 n_loop_old = 0;
132  //const PS::S64 dn_loop = n_loop - n_loop_old;
133  const PS::S64 dn_loop = n_loop;
134  PS::TimeProfile tp_dinfo = dinfo_->getTimeProfile();
135  PS::TimeProfile tp_system = system_->getTimeProfile();
136  PS::TimeProfile tp_tree = tree_->getTimeProfile();
137  PS::TimeProfile tp_dinfo_max, tp_system_max, tp_tree_max;
138  const PS::S32 n_profile_max = 100;
139  PS::S32 rank_dinfo_max[n_profile_max], rank_system_max[n_profile_max], rank_tree_max[n_profile_max];
140  getTimeProfileMax(tp_dinfo, PS::Comm::getRank(), tp_dinfo_max, rank_dinfo_max);
141  getTimeProfileMax(tp_system, PS::Comm::getRank(), tp_system_max, rank_system_max);
142  getTimeProfileMax(tp_tree, PS::Comm::getRank(), tp_tree_max, rank_tree_max);
143  PS::CountT n_int_ep_ep = tree_->getNumberOfInteractionEPEPGlobal();
144  PS::CountT n_int_ep_sp = tree_->getNumberOfInteractionEPSPGlobal();
145  PS::CountT n_op_tot = n_int_ep_ep * n_op_ep_ep_ + n_int_ep_sp * n_op_ep_sp_;
146  fout<<"soft part break down"<<std::endl;
147  fout<<"time_sys= "<<time_sys<<" n_loop= "<<n_loop<<std::endl;
148  fout<<"n_loc= "<<system_->getNumberOfParticleLocal()<<" n_glb= "<<system_->getNumberOfParticleGlobal()<<std::endl;
149  fout<<"speed= "<<(PS::F64)(n_op_tot)/(wtime_tot)*1e-12<<"[Tflops]"<<std::endl;
150  fout<<"PS::Comm::getNumberOfThread()= "<<PS::Comm::getNumberOfThread()<<std::endl;
151  fout<<"efficiency= "<<(PS::F64)(n_op_tot)/(wtime_tot)/(flops_per_core_*PS::Comm::getNumberOfProc()*PS::Comm::getNumberOfThread())<<std::endl;
152  fout<<"wtime_tot= "<<wtime_tot<<std::endl;
153  fout<<"n_op_tot= "<<n_op_tot<<std::endl;
154  //timer.dump(fout);
155  dumpTimeProfile0(tp_dinfo, tp_dinfo_max, rank_dinfo_max, fout);
156  dumpTimeProfile1(tp_system, tp_system_max, rank_system_max, fout);
157  fout<<"n_int_ep_ep= "<<n_int_ep_ep<<" n_int_ep_sp= "<<n_int_ep_sp<<std::endl;
158  fout<<"ni_ave= "<<(PS::F64)(system_->getNumberOfParticleGlobal() * dn_loop) / tree_->getNumberOfWalkGlobal()
159  <<" nj_ave(EP-EP)= "<<(PS::F64)(n_int_ep_ep) / (system_->getNumberOfParticleGlobal() * dn_loop)
160  <<" nj_ave(EP-SP)= "<<(PS::F64)(n_int_ep_sp) / (system_->getNumberOfParticleGlobal() * dn_loop)<<std::endl;
161  dumpTimeProfile2(tp_tree, tp_tree_max, rank_tree_max, fout);
162  //n_loop_old = n_loop;
163  }
164 
165  void clear(){
166  dinfo_->clearTimeProfile();
167  system_->clearTimeProfile();
168  tree_->clearCounterAll();
169  //tree_->clearTimeProfile();
170  //tree_->clearNumberOfInteraction();
171  }
172 
173 };
174 */
175 
176 struct Tprofile{
178  PS::F64 tbar; // barrier time, measure before barrier near end()
179  const char* name;
180 
181  Tprofile(const char* _name): time(0.0), tbar(0.0), name(_name) {}
182 
183  void start(){
184  time -= PS::GetWtime();
185  }
186 
187  void barrier(){
188  tbar -= PS::GetWtime();
189  }
190 
191  void end(){
192  tbar += PS::GetWtime();
193  time += PS::GetWtime();
194  }
195 
196  void print(std::ostream & fout, const PS::S32 divider=1){
197  fout<<name<<": "<<time/divider<<" "<<tbar/divider<<std::endl;
198  }
199 
200  void dump(std::ostream & fout, const PS::S32 divider=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
201  fout<<std::setw(width)<<time/divider;
202  }
203 
204  void dumpBarrier(std::ostream & fout, const PS::S32 divider=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const{
205  fout<<std::setw(width)<<tbar/divider;
206  }
207 
209  return PS::Comm::getMaxValue(time-tbar);
210  }
211 
213  return PS::Comm::getMinValue(time-tbar);
214  }
215 
216  void dumpName(std::ostream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
217  fout<<std::setw(width)<<name;
218  }
219 
220  void dumpBarrierName(std::ostream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
221  std::string sname("Bar_");
222  sname += name;
223  fout<<std::setw(width)<<sname;
224  }
225 
226  void reset() {
227  time = 0.0;
228  tbar = 0.0;
229  }
230 
231 };
232 
233 struct NumCounter{
235  const char* name;
236 
237  NumCounter(const char* _name): n(0), name(_name) {}
238 
240  (this->n)++;
241  return *this;
242  }
243 
245  this->n += _n;
246  return *this;
247  }
248 
250  this->n = _n;
251  return *this;
252  }
253 
254  void print(std::ostream & fout, const PS::S32 divider=1) const{
255  fout<<name<<": "<<((divider==1)?n:(PS::F64)n/divider)<<std::endl;
256  }
257 
258  void dump(std::ostream & fout, const PS::S32 divider=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
259  fout<<std::setw(width)<<((divider==1)?n:(PS::F64)n/divider);
260  }
261 
262  void dumpName(std::ostream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
263  fout<<std::setw(width)<<name;
264  }
265 };
266 
267 class FDPSProfile: public PS::TimeProfile {
268 public:
269  FDPSProfile &operator+=(const PS::TimeProfile _tp) {
270  *(TimeProfile*)this = *(TimeProfile*)this + _tp;
271  return *this;
272  }
273 
274  void dumpName(std::ostream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
275  fout<<std::setw(width)<<"Sample_ptcl"
276  <<std::setw(width)<<"Domain_deco"
277  <<std::setw(width)<<"Ex_ptcl "
278  <<std::setw(width)<<"Set_ptcl_LT"
279  <<std::setw(width)<<"Set_ptcl_GT"
280  <<std::setw(width)<<"Make_LT "
281  <<std::setw(width)<<"Make_GT "
282  <<std::setw(width)<<"SetRootCell"
283  <<std::setw(width)<<"Calc_force "
284  <<std::setw(width)<<"Calc_mom_LT"
285  <<std::setw(width)<<"Calc_mom_GT"
286  <<std::setw(width)<<"Make_LET_1 "
287  <<std::setw(width)<<"Make_LET_2 "
288  <<std::setw(width)<<"Ex_LET_1 "
289  <<std::setw(width)<<"Ex_LET_2 "
290  <<std::setw(width)<<"Write_back ";
291  }
292 
293  void dump(std::ostream & fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH){
294  fout<<std::setw(width)<<collect_sample_particle /n_loop
295  <<std::setw(width)<<decompose_domain /n_loop
296  <<std::setw(width)<<exchange_particle /n_loop
297  <<std::setw(width)<<set_particle_local_tree /n_loop
298  <<std::setw(width)<<set_particle_global_tree/n_loop
299  <<std::setw(width)<<make_local_tree /n_loop
300  <<std::setw(width)<<make_global_tree /n_loop
301  <<std::setw(width)<<set_root_cell /n_loop
302  <<std::setw(width)<<calc_force /n_loop
303  <<std::setw(width)<<calc_moment_local_tree /n_loop
304  <<std::setw(width)<<calc_moment_global_tree /n_loop
305  <<std::setw(width)<<make_LET_1st /n_loop
306  <<std::setw(width)<<make_LET_2nd /n_loop
307  <<std::setw(width)<<exchange_LET_1st /n_loop
308  <<std::setw(width)<<exchange_LET_2nd /n_loop
309  <<std::setw(width)<<write_back /n_loop;
310  }
311 };
312 
314 public:
332 
333  SysProfile(): total (Tprofile("Total ")),
334  hard_single (Tprofile("PP_single ")),
335  hard_isolated (Tprofile("PP_cluster ")),
336  hard_connected(Tprofile("PP_cross ")),
337  hard_interrupt(Tprofile("PP_intrpt* ")),
338  tree_nb (Tprofile("Tree_NB ")),
339  tree_soft (Tprofile("Tree_Force ")),
340  force_correct (Tprofile("Force_corr ")),
341  kick (Tprofile("Kick ")),
342  search_cluster(Tprofile("FindCluster")),
343  create_group (Tprofile("CreateGroup")),
344  domain (Tprofile("Domain_deco")),
345  exchange (Tprofile("Ex_Ptcl ")),
346  output (Tprofile("Output ")),
347  status (Tprofile("Status ")),
348  other (Tprofile("Other ")),
349  n_profile(16) {}
350 
351  void print(std::ostream & fout, const PS::F64 time_sys, const PS::S64 n_loop=1){
352  fout<<"Time: "<<time_sys<<std::endl;
353 
354  for(PS::S32 i=0; i<n_profile; i++) {
355  Tprofile* iptr = (Tprofile*)this+i;
356  iptr->print(fout, n_loop);
357  }
358  }
359 
360 
361  void dump(std::ostream & fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
362  for(PS::S32 i=0; i<n_profile; i++) {
363  Tprofile* iptr = (Tprofile*)this+i;
364  iptr->dump(fout, n_loop, width);
365  }
366  }
367 
368  void dumpName(std::ostream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
369  for(PS::S32 i=0; i<n_profile; i++) {
370  Tprofile* iptr = (Tprofile*)this+i;
371  iptr->dumpName(fout, width);
372  }
373  }
374 
375  void dumpBarrier(std::ostream & fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
376  for(PS::S32 i=0; i<n_profile; i++) {
377  Tprofile* iptr = (Tprofile*)this+i;
378  iptr->dumpBarrier(fout, n_loop, width);
379  }
380  }
381 
382  void dumpBarrierName(std::ostream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
383  for(PS::S32 i=0; i<n_profile; i++) {
384  Tprofile* iptr = (Tprofile*)this+i;
385  iptr->dumpBarrierName(fout, width);
386  }
387  }
388 
390  SysProfile pmax;
391  for(PS::S32 i=0; i<n_profile; i++) {
392  Tprofile* iptr = (Tprofile*)this+i;
393  Tprofile* pptr = (Tprofile*)&pmax+i;
394  pptr->time = iptr->getMax();
395  }
396  return pmax;
397  }
398 
400  SysProfile pmin;
401  for(PS::S32 i=0; i<n_profile; i++) {
402  Tprofile* iptr = (Tprofile*)this+i;
403  Tprofile* pptr = (Tprofile*)&pmin+i;
404  pptr->time = iptr->getMin();
405  }
406  return pmin;
407  }
408 
409  void clear(){
410  for(PS::S32 i=0; i<n_profile; i++) {
411  Tprofile* iptr = (Tprofile*)this+i;
412  iptr->reset();
413  }
414  }
415 
416 };
417 
418 class SysCounts{
419 public:
434  //NumCounter ARC_step_group;
436  std::map<PS::S32,PS::S32> n_cluster;
437 
438  SysCounts(): hard_single (NumCounter("PP_single ")),
439  hard_isolated (NumCounter("PP_cluster ")),
440  hard_connected (NumCounter("PP_cross ")),
441  hard_interrupt (NumCounter("PP_intrpt* ")),
442  cluster_isolated (NumCounter("Cluster ")),
443  cluster_connected(NumCounter("Cross ")),
444  ARC_substep_sum (NumCounter("AR_step_sum")),
445  ARC_tsyn_step_sum(NumCounter("AR_tsyn_sum")),
446  ARC_n_groups (NumCounter("AR_group_N ")),
447  ARC_n_groups_iso (NumCounter("Iso_group_N")),
448  H4_step_sum (NumCounter("H4_step_sum")),
449  n_neighbor_zero (NumCounter("H4_no_NB ")),
450  ep_ep_interact (NumCounter("Ep-Ep_sum ")),
451  ep_sp_interact (NumCounter("Ep-Sp_sum ")),
452  //ARC_step_group (NumCounter("ARC step per group")),
453  n_counter(14) {}
454 
455  void clusterCount(const PS::S32 n, const PS::S32 ntimes=1) {
456  if (n_cluster.count(n)) n_cluster[n] += ntimes;
457  else n_cluster[n]=ntimes;
458  }
459 
461  n_cluster.clear();
462  }
463 
464  void getherClusterCount(int* n, int* count, const long unsigned int size) {
465  assert(size==n_cluster.size());
466  int index=0;
467  for(auto i=n_cluster.begin(); i!=n_cluster.end(); ++i) {
468  n[index]=i->first;
469  count[index] = i->second;
470  index++;
471  }
472  }
473 
474  void copyClusterCount(SysCounts& n_count) {
475  for(auto i=n_count.n_cluster.begin(); i!=n_count.n_cluster.end(); ++i) {
476  n_cluster[i->first] = i->second;
477  }
478  }
479 
480  void addClusterCount(SysCounts& n_count) {
481  for(auto i=n_count.n_cluster.begin(); i!=n_count.n_cluster.end(); ++i) {
482  if(n_cluster.count(i->first)) n_cluster[i->first] += i->second;
483  else n_cluster[i->first] = i->second;
484  }
485  }
486 
487  void printHist(std::ostream & fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const {
488  for(auto i=n_cluster.begin(); i!=n_cluster.end(); ++i) fout<<std::setw(width)<<i->first;
489  fout<<std::endl;
490  for(auto i=n_cluster.begin(); i!=n_cluster.end(); ++i) fout<<std::setw(width)<<i->second/((n_loop==1)?1:(PS::F64)n_loop);
491  fout<<std::endl;
492  }
493 
494  //void dump(std::ofstream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH, const PS::S64 n_loop=1){
495  // for(PS::S32 i=0; i<n_counter; i++) {
496  // NumCounter* iptr = (NumCounter*)this+i;
497  // iptr->dump(fout, width, n_loop);
498  // }
499  //}
500 
501  //void dumpHist(std::ofstream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH, const PS::S64 n_loop=1){
502  // for(auto i=n_cluster.begin(); i!=n_cluster.end(); ++i)
503  // fout<<std::setw(width)<<i->first<<std::setw(width)<<i->second/((n_loop==1)?1:(PS::F64)n_loop);
504  //}
505  //
506  //void dumpName(std::ofstream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH) {
507  // for(PS::S32 i=0; i<n_counter; i++) {
508  // NumCounter* iptr = (NumCounter*)this+i;
509  // iptr->dumpName(fout, width);
510  // }
511  //}
512 
513  void dump(std::ostream & fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const{
514  for(PS::S32 i=0; i<n_counter; i++) {
515  NumCounter* iptr = (NumCounter*)this+i;
516  iptr->dump(fout, n_loop, width);
517  }
518  }
519 
520 
521  void dumpHist(std::ostream & fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const{
522  for(auto i=n_cluster.begin(); i!=n_cluster.end(); ++i)
523  fout<<std::setw(width)<<i->first<<std::setw(width)<<i->second/((n_loop==1)?1:(PS::F64)n_loop);
524  }
525 
526  void dumpName(std::ostream & fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const{
527  for(PS::S32 i=0; i<n_counter; i++) {
528  NumCounter* iptr = (NumCounter*)this+i;
529  iptr->dumpName(fout, width);
530  }
531  }
532 
533  void clear() {
534  for(PS::S32 i=0; i<n_counter; i++) {
535  NumCounter* iptr = (NumCounter*)this+i;
536  *iptr = 0;
537  }
538  n_cluster.clear();
539  }
540 };
SysCounts::clear
void clear()
Definition: profile.hpp:533
NumCounter
Definition: profile.hpp:233
Tprofile::getMax
PS::F64 getMax()
Definition: profile.hpp:208
SysProfile::tree_nb
Tprofile tree_nb
Definition: profile.hpp:320
Tprofile::dump
void dump(std::ostream &fout, const PS::S32 divider=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:200
FDPSProfile::dump
void dump(std::ostream &fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH)
Definition: profile.hpp:293
SysProfile::output
Tprofile output
Definition: profile.hpp:328
NumCounter::operator=
NumCounter & operator=(const PS::S64 _n)
Definition: profile.hpp:249
SysCounts::getherClusterCount
void getherClusterCount(int *n, int *count, const long unsigned int size)
Definition: profile.hpp:464
SysCounts::addClusterCount
void addClusterCount(SysCounts &n_count)
Definition: profile.hpp:480
PIKG::S32
int32_t S32
Definition: pikg_vector.hpp:24
SysProfile::search_cluster
Tprofile search_cluster
Definition: profile.hpp:324
SysCounts::cluster_isolated
NumCounter cluster_isolated
Definition: profile.hpp:424
SysCounts::clusterCount
void clusterCount(const PS::S32 n, const PS::S32 ntimes=1)
Definition: profile.hpp:455
FDPSProfile::operator+=
FDPSProfile & operator+=(const PS::TimeProfile _tp)
Definition: profile.hpp:269
SysCounts::SysCounts
SysCounts()
Definition: profile.hpp:438
SysCounts::hard_connected
NumCounter hard_connected
Definition: profile.hpp:422
Tprofile::dumpName
void dumpName(std::ostream &fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:216
Tprofile::end
void end()
Definition: profile.hpp:191
SysCounts::ARC_n_groups_iso
NumCounter ARC_n_groups_iso
Definition: profile.hpp:429
SysCounts::ep_sp_interact
NumCounter ep_sp_interact
Definition: profile.hpp:433
Tprofile::barrier
void barrier()
Definition: profile.hpp:187
SysProfile::dumpBarrier
void dumpBarrier(std::ostream &fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:375
PIKG::F64
double F64
Definition: pikg_vector.hpp:17
SysProfile
Definition: profile.hpp:313
SysProfile::getMin
SysProfile getMin()
Definition: profile.hpp:399
SysProfile::hard_isolated
Tprofile hard_isolated
Definition: profile.hpp:317
NumCounter::operator++
NumCounter & operator++()
Definition: profile.hpp:239
SysCounts::copyClusterCount
void copyClusterCount(SysCounts &n_count)
Definition: profile.hpp:474
SysProfile::force_correct
Tprofile force_correct
Definition: profile.hpp:322
SysProfile::dump
void dump(std::ostream &fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:361
SysProfile::tree_soft
Tprofile tree_soft
Definition: profile.hpp:321
PIKG::S64
int64_t S64
Definition: pikg_vector.hpp:23
SysCounts::ep_ep_interact
NumCounter ep_ep_interact
Definition: profile.hpp:432
Tprofile::dumpBarrier
void dumpBarrier(std::ostream &fout, const PS::S32 divider=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:204
SysCounts::hard_interrupt
NumCounter hard_interrupt
Definition: profile.hpp:423
SysProfile::SysProfile
SysProfile()
Definition: profile.hpp:333
SysProfile::kick
Tprofile kick
Definition: profile.hpp:323
SysProfile::total
Tprofile total
Definition: profile.hpp:315
SysProfile::domain
Tprofile domain
Definition: profile.hpp:326
SysCounts::n_cluster
std::map< PS::S32, PS::S32 > n_cluster
Histogram of number of particles in clusters.
Definition: profile.hpp:436
SysProfile::n_profile
const PS::S32 n_profile
Definition: profile.hpp:331
SysCounts::n_counter
const PS::S32 n_counter
Definition: profile.hpp:435
Tprofile::dumpBarrierName
void dumpBarrierName(std::ostream &fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:220
NumCounter::n
PS::S64 n
Definition: profile.hpp:234
Tprofile::name
const char * name
Definition: profile.hpp:179
Tprofile::print
void print(std::ostream &fout, const PS::S32 divider=1)
Definition: profile.hpp:196
NumCounter::dumpName
void dumpName(std::ostream &fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:262
SysProfile::create_group
Tprofile create_group
Definition: profile.hpp:325
SysCounts::dumpHist
void dumpHist(std::ostream &fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:521
Tprofile::tbar
PS::F64 tbar
Definition: profile.hpp:178
SysProfile::hard_connected
Tprofile hard_connected
Definition: profile.hpp:318
SysProfile::dumpName
void dumpName(std::ostream &fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:368
SysProfile::print
void print(std::ostream &fout, const PS::F64 time_sys, const PS::S64 n_loop=1)
Definition: profile.hpp:351
SysProfile::clear
void clear()
Definition: profile.hpp:409
Tprofile::reset
void reset()
Definition: profile.hpp:226
NumCounter::dump
void dump(std::ostream &fout, const PS::S32 divider=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:258
SysProfile::getMax
SysProfile getMax()
Definition: profile.hpp:389
SysProfile::hard_interrupt
Tprofile hard_interrupt
Definition: profile.hpp:319
FDPSProfile
Definition: profile.hpp:267
SysProfile::other
Tprofile other
Definition: profile.hpp:330
NumCounter::name
const char * name
Definition: profile.hpp:235
Tprofile::Tprofile
Tprofile(const char *_name)
Definition: profile.hpp:181
SysCounts::cluster_connected
NumCounter cluster_connected
Definition: profile.hpp:425
Tprofile
Definition: profile.hpp:176
SysCounts::hard_single
NumCounter hard_single
Definition: profile.hpp:420
SysCounts::hard_isolated
NumCounter hard_isolated
Definition: profile.hpp:421
SysProfile::exchange
Tprofile exchange
Definition: profile.hpp:327
SysProfile::dumpBarrierName
void dumpBarrierName(std::ostream &fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:382
SysCounts::clearClusterCount
void clearClusterCount()
Definition: profile.hpp:460
Tprofile::getMin
PS::F64 getMin()
Definition: profile.hpp:212
NumCounter::operator+=
NumCounter & operator+=(const PS::S64 _n)
Definition: profile.hpp:244
FDPSProfile::dumpName
void dumpName(std::ostream &fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:274
SysCounts::dumpName
void dumpName(std::ostream &fout, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:526
NumCounter::print
void print(std::ostream &fout, const PS::S32 divider=1) const
Definition: profile.hpp:254
SysCounts::ARC_substep_sum
NumCounter ARC_substep_sum
Definition: profile.hpp:426
NumCounter::NumCounter
NumCounter(const char *_name)
Definition: profile.hpp:237
PROFILE_PRINT_WIDTH
#define PROFILE_PRINT_WIDTH
Definition: profile.hpp:8
Tprofile::time
PS::F64 time
Definition: profile.hpp:177
SysCounts::H4_step_sum
NumCounter H4_step_sum
Definition: profile.hpp:430
SysProfile::hard_single
Tprofile hard_single
Definition: profile.hpp:316
SysCounts::printHist
void printHist(std::ostream &fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:487
SysCounts::dump
void dump(std::ostream &fout, const PS::S64 n_loop=1, const PS::S32 width=PROFILE_PRINT_WIDTH) const
Definition: profile.hpp:513
SysProfile::status
Tprofile status
Definition: profile.hpp:329
SysCounts
Definition: profile.hpp:418
SysCounts::n_neighbor_zero
NumCounter n_neighbor_zero
Definition: profile.hpp:431
SysCounts::ARC_tsyn_step_sum
NumCounter ARC_tsyn_step_sum
Definition: profile.hpp:427
SysCounts::ARC_n_groups
NumCounter ARC_n_groups
Definition: profile.hpp:428
Tprofile::start
void start()
Definition: profile.hpp:183