MODFLOW 6  version 6.7.0.dev0
USGS Modular Hydrologic Model
Profiler.f90
Go to the documentation of this file.
2  use kindmodule, only: i4b, dp, lgp
6  implicit none
7  private
8 
9  ! constants for memory allocation
10  integer(I4B), parameter :: max_nr_timed_sections = 75
11  integer(I4B), public, parameter :: len_section_title = 128
12 
13  ! data structure to store measurements for a section
14  type, private :: measuredsectiontype
15  character(len=LEN_SECTION_TITLE) :: title !< title to identify timed section in log
16  real(dp) :: walltime !< walltime spent in section
17  integer(I4B) :: count !< number of times section was entered
18  integer(I4B) :: status !< =1 means section timer started, =0 otherwise
19  integer(I4B) :: parent_id !< id of parent, or 0 when root
20  type(stlvecint) :: children !< ids of children
21  end type measuredsectiontype
22 
23  !> @brief A public type for profiling performance in the application.
24  !! The ProfilerType is used to measure and record the performance of various
25  !! parts of the application. It provides mechanisms to start, stop, and
26  !< report on the performance metrics collected during execution.
27  type, public :: profilertype
28  ! handles for the global simulation structure (with no simulation objects to store them)
29  integer(I4B) :: tmr_run !< handle to timed section "Run"
30  integer(I4B) :: tmr_init !< handle to timed section "Initialize"
31  integer(I4B) :: tmr_update !< handle to timed section "Update"
32  integer(I4B) :: tmr_finalize !< handle to timed section "Finalize"
33  integer(I4B) :: tmr_prep_tstp !< handle to timed section "Prepare time step"
34  integer(I4B) :: tmr_do_tstp !< handle to timed section "Do time step"
35  integer(I4B) :: tmr_final_tstp !< handle to timed section "Finalize time step"
36  integer(I4B) :: tmr_output !< handle to timed section "Write output"
37  integer(I4B) :: tmr_nc_export !< handle to timed section "NetCDF export"
38  ! private
39  integer(I4B), private :: iout !< output unit number, typically simulation listing file
40  integer(I4B), private :: pr_option !< 0 = NONE, 1 = SUMMARY, 2 = DETAIL
41  integer(I4B), private :: nr_sections !< number of sections
42  integer(I4B), private, dimension(3) :: top_three !< top three leaf sections based on walltime
43  integer(I4B), private :: max_title_len !< maximum title length
44  integer(I4B), private :: root_id !< currently only one root section is supported, this is the id
45  type(measuredsectiontype), dimension(:), pointer :: all_sections => null() !< all timed sections (up to MAX_NR_TIMED_SECTIONS)
46  type(stlstackint) :: callstack !< call stack of section ids
47  contains
48  procedure :: initialize
49  procedure :: add_section
50  procedure :: start
51  procedure :: stop
52  procedure :: print
53  procedure :: destroy
54  procedure :: is_initialized
55  procedure :: set_print_option
56  ! private
57  procedure, private :: print_section
58  procedure, private :: print_total
59  procedure, private :: aggregate_walltime
60  procedure, private :: aggregate_counts
61  procedure, private :: largest_title_length
62  procedure, private :: sort_by_walltime
63  end type profilertype
64 
65  type(profilertype), public :: g_prof !< the global timer object (to reduce trivial lines of code)
66 
67 contains
68 
69  !< @brief Initialize the CPU timer object
70  !<
71  subroutine initialize(this)
72  class(profilertype) :: this
73  ! local
74  integer(I4B) :: i
75 
76  this%tmr_run = -1
77  this%tmr_init = -1
78  this%tmr_update = -1
79  this%tmr_finalize = -1
80  this%tmr_prep_tstp = -1
81  this%tmr_do_tstp = -1
82  this%tmr_final_tstp = -1
83  this%tmr_output = -1
84  this%tmr_nc_export = -1
85 
86  call this%callstack%init()
87 
88  allocate (this%all_sections(max_nr_timed_sections))
89  do i = 1, max_nr_timed_sections
90  this%all_sections(i)%title = "undefined"
91  this%all_sections(i)%status = 0
92  this%all_sections(i)%walltime = dzero
93  this%all_sections(i)%count = 0
94  this%all_sections(i)%parent_id = 0
95  call this%all_sections(i)%children%init()
96  end do
97 
98  this%nr_sections = 0
99  this%root_id = 0
100  this%top_three = [0, 0, 0]
101 
102  end subroutine initialize
103 
104  !> @brief Add a new timed section to the tree,
105  !! passing the parent id will add it as a child
106  !< in the tree
107  function add_section(this, title, parent_id) result(section_id)
108  use simmodule, only: ustop
109  class(profilertype) :: this
110  character(len=*) :: title
111  integer(I4B) :: parent_id
112  integer(I4B) :: section_id
113 
114  ! increment to new section id
115  this%nr_sections = this%nr_sections + 1
116  section_id = this%nr_sections
117  if (section_id > size(this%all_sections)) then
118  write (*, *) "Internal error: Too many profiled sections, "&
119  &"increase MAX_NR_TIMED_SECTIONS"
120  call ustop()
121  end if
122 
123  ! initialize new section
124  this%all_sections(section_id)%title = title
125  this%all_sections(section_id)%walltime = dzero
126  this%all_sections(section_id)%status = 0
127 
128  ! if parent, otherwise root section
129  if (parent_id > 0) then
130  ! add child to parent
131  this%all_sections(section_id)%parent_id = parent_id
132  call this%all_sections(parent_id)%children%push_back(section_id)
133  else
134  ! this is the root, assume there's only one!
135  this%all_sections(section_id)%parent_id = 0
136  this%root_id = section_id
137  end if
138 
139  end function add_section
140 
141  !> @brief Start section timing, add when not exist yet (i.e. when id < 1)
142  !<
143  subroutine start(this, title, section_id)
144  class(profilertype) :: this
145  character(len=*) :: title
146  integer(I4B) :: section_id
147  ! local
148  integer(I4B) :: parent_id
149  real(DP) :: start_time
150  type(measuredsectiontype), pointer :: section
151 
152  call cpu_time(start_time)
153 
154  if (section_id == -1) then
155  ! add section if not exist
156  parent_id = 0 ! root
157  if (this%callstack%size() > 0) then
158  parent_id = this%callstack%top()
159  end if
160  section_id = this%add_section(title, parent_id)
161  end if
162  call this%callstack%push(section_id)
163 
164  section => this%all_sections(section_id)
165  section%count = section%count + 1
166  section%status = 1
167  section%walltime = section%walltime - start_time
168 
169  end subroutine start
170 
171  subroutine stop(this, section_id)
172  class(profilertype) :: this
173  integer(I4B) :: section_id
174  ! local
175  real(DP) :: end_time
176  type(measuredsectiontype), pointer :: section
177 
178  call cpu_time(end_time)
179 
180  ! nett result (c.f. start(...)) is adding (dt = end_time - start_time)
181  section => this%all_sections(section_id)
182  section%status = 0
183  section%walltime = section%walltime + end_time
184 
185  ! pop from call stack
186  call this%callstack%pop()
187 
188  end subroutine stop
189 
190  subroutine print(this, output_unit)
191  class(profilertype) :: this
192  integer(I4B), intent(in) :: output_unit
193  ! local
194  integer(I4B) :: level, i, top_idx
195  integer(I4B), dimension(:), allocatable :: sorted_idxs
196 
197  this%iout = output_unit
198  if (this%pr_option == 0) return
199 
200  ! get top three leaf sections based on walltime
201  top_idx = 1
202  sorted_idxs = (/(i, i=1, this%nr_sections)/)
203  call this%sort_by_walltime(sorted_idxs)
204  do i = 1, this%nr_sections
205  if (this%all_sections(sorted_idxs(i))%children%size == 0) then ! leaf node
206  if (top_idx > 3) exit
207  this%top_three(top_idx) = sorted_idxs(i)
208  top_idx = top_idx + 1
209  end if
210  end do
211 
212  this%max_title_len = this%largest_title_length()
213 
214  if (this%pr_option > 1) then
215  ! print timing call stack
216  level = 0
217  write (this%iout, '(/1x,a/)') &
218  repeat('-', 18)//" Profiler: Call Stack "//repeat('-', 18)
219  call this%print_section(this%root_id, level)
220  end if
221 
222  ! print walltime per category from substring (if exist)
223  ! note: the sections containing the substring should not be nested,
224  ! otherwise the walltime will be counted multiple times
225  write (this%iout, '(1x,a/)') &
226  repeat('-', 20)//" Profiler: Totals "//repeat('-', 20)
227  call this%print_total("Formulate")
228  call this%print_total("Linear solve")
229  call this%print_total("Calculate flows")
230  call this%print_total("Calculate budgets")
231  call this%print_total("Write output")
232  call this%print_total("Parallel Solution")
233  call this%print_total("MPI_WaitAll")
234  write (this%iout, '(/1x,a/)') &
235  repeat('-', 22)//" End Profiler "//repeat('-', 22)
236 
237  end subroutine print
238 
239  recursive subroutine print_section(this, section_id, level)
240  use arrayhandlersmodule, only: ifind
241  class(profilertype) :: this
242  integer(I4B) :: section_id
243  integer(I4B) :: level
244  ! local
245  integer(I4B) :: i, new_level, nr_padding, idx_top
246  real(dp) :: percent
247  type(measuredsectiontype), pointer :: section
248  character(len=:), allocatable :: title_padded
249  character(len=LINELENGTH) :: top_marker
250 
251  section => this%all_sections(section_id)
252 
253  ! calculate percentage
254  percent = 1.0_dp
255  if (section%parent_id /= 0) then
256  percent = section%walltime / this%all_sections(this%root_id)%walltime
257  end if
258  percent = percent * 100.0_dp
259 
260  ! determine if section should be marked as top three
261  top_marker = ""
262  idx_top = ifind(this%top_three, section_id)
263  if (idx_top > 0) then
264  nr_padding = max(0, 32 - level * 4)
265  write (top_marker, '(a,i0)') repeat(" ", nr_padding)//"<== #", idx_top
266  end if
267 
268  ! print section timing
269  nr_padding = this%max_title_len - len_trim(section%title) + 2
270  title_padded = trim(section%title)//":"//repeat(' ', nr_padding)
271  write (this%iout, '(3a,f6.2,2a,f14.6,2a,i0,a,a)') " ", &
272  repeat('....', level), "[", percent, "%] ", title_padded, &
273  section%walltime, "s", " (", section%count, "x)", trim(top_marker)
274 
275  ! print children
276  new_level = level + 1
277  do i = 1, section%children%size
278  call this%print_section(section%children%at(i), new_level)
279  end do
280 
281  if (level == 0) write (this%iout, *)
282 
283  end subroutine print_section
284 
285  subroutine print_total(this, subtitle)
286  class(profilertype) :: this
287  character(len=*) :: subtitle
288  ! local
289  integer(I4B) :: count
290  real(DP) :: walltime, percent
291  integer(I4B) :: nr_padding
292  character(len=:), allocatable :: title_padded
293 
294  ! get maximum length of title
295  nr_padding = this%max_title_len - len_trim(subtitle)
296  title_padded = trim(subtitle)//repeat(' ', nr_padding)
297 
298  count = this%aggregate_counts(subtitle)
299  if (count > 0) then
300  walltime = aggregate_walltime(this, subtitle)
301  percent = (walltime / this%all_sections(this%root_id)%walltime) * 100.0_dp
302  write (this%iout, '(2a,f6.2,3a,f14.6,2a,i0,a)') " ", "[", percent, &
303  "%] ", title_padded, ": ", walltime, "s", " (", count, "x)"
304  end if
305 
306  end subroutine print_total
307 
308  !> @brief Aggregate walltime over sections with a certain title
309  !<
310  function aggregate_walltime(this, title) result(walltime)
311  class(profilertype) :: this
312  character(len=*) :: title
313  real(dp) :: walltime
314  ! local
315  integer(I4B) :: i
316 
317  walltime = dzero
318  do i = 1, this%nr_sections
319  if (index(this%all_sections(i)%title, trim(title)) > 0) then
320  walltime = walltime + this%all_sections(i)%walltime
321  end if
322  end do
323 
324  end function aggregate_walltime
325 
326  !> @brief Aggregate counts over sections with a certain title
327  !<
328  function aggregate_counts(this, title) result(counts)
329  class(profilertype) :: this
330  character(len=*) :: title
331  integer(I4B) :: counts
332  ! local
333  integer(I4B) :: i
334 
335  counts = 0
336  do i = 1, this%nr_sections
337  if (index(this%all_sections(i)%title, trim(title)) > 0) then
338  counts = counts + this%all_sections(i)%count
339  end if
340  end do
341 
342  end function aggregate_counts
343 
344  !> @brief Set the profile option from the user input
345  !<
346  subroutine set_print_option(this, profile_option)
347  class(profilertype) :: this
348  character(len=*), intent(in) :: profile_option
349 
350  select case (trim(profile_option))
351  case ("NONE")
352  this%pr_option = 0
353  case ("SUMMARY")
354  this%pr_option = 1
355  case ("DETAIL")
356  this%pr_option = 2
357  case default
358  this%pr_option = 0
359  end select
360 
361  end subroutine set_print_option
362 
363  !> @brief Clean up the CPU timer object
364  !<
365  subroutine destroy(this)
366  class(profilertype) :: this
367  ! local
368  integer(I4B) :: i
369 
370  call this%callstack%destroy()
371 
372  do i = 1, max_nr_timed_sections
373  call this%all_sections(i)%children%destroy()
374  end do
375  deallocate (this%all_sections)
376  nullify (this%all_sections)
377 
378  end subroutine destroy
379 
380  function is_initialized(this) result(initialized)
381  class(profilertype) :: this
382  logical(LGP) :: initialized
383 
384  initialized = associated(this%all_sections)
385 
386  end function is_initialized
387 
388  !> @brief Calculate the largest title length
389  !<
390  function largest_title_length(this) result(max_length)
391  class(profilertype) :: this
392  integer(I4B) :: max_length
393  integer(I4B) :: i
394 
395  max_length = 0
396  do i = 1, this%nr_sections
397  max_length = max(max_length, len_trim(this%all_sections(i)%title))
398  end do
399 
400  end function largest_title_length
401 
402  !> @brief Sort section indexes based on walltime
403  !<
404  subroutine sort_by_walltime(this, idxs)
405  class(profilertype) :: this
406  integer(I4B), dimension(:), allocatable :: idxs !< array with unsorted section idxs
407  integer(I4B) :: i, j, temp
408 
409  ! Simple bubble sort for demonstration purposes
410  do i = 1, size(idxs) - 1
411  do j = 1, size(idxs) - i
412  if (this%all_sections(idxs(j))%walltime < &
413  this%all_sections(idxs(j + 1))%walltime) then
414  temp = idxs(j)
415  idxs(j) = idxs(j + 1)
416  idxs(j + 1) = temp
417  end if
418  end do
419  end do
420 
421  end subroutine sort_by_walltime
422 
423 end module profilermodule
This module contains simulation constants.
Definition: Constants.f90:9
integer(i4b), parameter linelength
maximum length of a standard line
Definition: Constants.f90:45
real(dp), parameter dnodata
real no data constant
Definition: Constants.f90:95
real(dp), parameter dzero
real constant zero
Definition: Constants.f90:65
integer(i4b), parameter lenmempath
maximum length of the memory path
Definition: Constants.f90:27
This module defines variable data types.
Definition: kind.f90:8
type(profilertype), public g_prof
the global timer object (to reduce trivial lines of code)
Definition: Profiler.f90:65
integer(i4b) function aggregate_counts(this, title)
Aggregate counts over sections with a certain title.
Definition: Profiler.f90:329
real(dp) function aggregate_walltime(this, title)
Aggregate walltime over sections with a certain title.
Definition: Profiler.f90:311
subroutine sort_by_walltime(this, idxs)
Sort section indexes based on walltime.
Definition: Profiler.f90:405
integer(i4b), parameter, public len_section_title
Definition: Profiler.f90:11
subroutine set_print_option(this, profile_option)
Set the profile option from the user input.
Definition: Profiler.f90:347
subroutine print(this, output_unit)
Definition: Profiler.f90:191
integer(i4b), parameter max_nr_timed_sections
Definition: Profiler.f90:10
logical(lgp) function is_initialized(this)
Definition: Profiler.f90:381
integer(i4b) function largest_title_length(this)
Calculate the largest title length.
Definition: Profiler.f90:391
subroutine destroy(this)
Clean up the CPU timer object.
Definition: Profiler.f90:366
subroutine initialize(this)
Definition: Profiler.f90:72
integer(i4b) function add_section(this, title, parent_id)
Add a new timed section to the tree, passing the parent id will add it as a child.
Definition: Profiler.f90:108
recursive subroutine print_section(this, section_id, level)
Definition: Profiler.f90:240
subroutine start(this, title, section_id)
Start section timing, add when not exist yet (i.e. when id < 1)
Definition: Profiler.f90:144
subroutine stop(this, section_id)
Definition: Profiler.f90:172
subroutine print_total(this, subtitle)
Definition: Profiler.f90:286
This module contains simulation methods.
Definition: Sim.f90:10
subroutine, public ustop(stopmess, ioutlocal)
Stop the simulation.
Definition: Sim.f90:312
A public type for profiling performance in the application. The ProfilerType is used to measure and r...
Definition: Profiler.f90:27
A derived type representing a stack of integers.
Definition: STLStackInt.f90:14