Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

export PSYCLONE_PHYSICS_FILES = \
bl_lsp \
bl_diags_mod \
bm_tau_kernel_mod \
btq_int \
conv_gr_kernel_mod \
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ def trans(psyir):
for node in psyir.walk(Routine):
loop_replacement_of(node, "j")

# Span a parallel section across the whole routine,
# apart for a few exceptions provided
# Place a parallel do around loops
for loop in psyir.walk(Loop):
loop_ancestor_type = ""
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@
Routine,
Loop,
OMPParallelDirective,
IfBlock,
Reference,
)
from psyclone.transformations import (TransformationError)
from transmute_psytrans.transmute_functions import (
loop_replacement_of,
get_compiler,
first_priv_red_init,
remove_unspanable_nodes,
set_pure_subroutines,
replace_n_threads,
OMP_PARALLEL_REGION_TRANS,
Expand All @@ -53,7 +54,6 @@
"i": {"variable": "i"}}) # For bdy_impl3.F90


# Longer term we will raise some of these into a override import, see Apps#900
# pylint: disable=too-many-locals
# pylint: disable=too-many-statements
# pylint: disable=too-many-branches
Expand Down Expand Up @@ -130,19 +130,62 @@ def trans(psyir):

# Span a parallel section across the whole routine,
# apart for a few exceptions provided
for routine in psyir.walk(Routine):
routine_children = remove_unspanable_nodes(
routine,
timer_routine_names,
loop_type_init
)
# Span the region across filtered down node list
try:
OMP_PARALLEL_REGION_TRANS.apply(
routine_children)
except (TransformationError, IndexError) as err:
logging.warning("OMPParallelTrans failed: %s", err)
for routine in psyir.walk(Routine): #pylint: disable=R1702
# Get the list of children of the routine
routine_children = routine.children
# Default reference for start_node
start_node = None
# Work through the routine's children from the start.
for index, routine_child in enumerate(routine_children):
# When the first loop or if block is found,
# place it as the start reference index.
if isinstance(routine_child, Loop): #pylint: disable=R1723
start_node = index
break
elif isinstance(routine_child, IfBlock):
found_valid_if = True
# Often timing handles are placed inside if blocks,
# check if it is not a known timing call, which should be
# ignored for spanning a parallel section.
for routine_grandchild in routine_child.walk(Reference):
try:
if str(routine_grandchild.name) in timer_routine_names:
found_valid_if = False
except ValueError: # noqa: E722
continue
if found_valid_if:
start_node = index
break
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
found_valid_if = True
# Often timing handles are placed inside if blocks,
# check if it is not a known timing call, which should be
# ignored for spanning a parallel section.
for routine_grandchild in routine_child.walk(Reference):
try:
if str(routine_grandchild.name) in timer_routine_names:
found_valid_if = False
except ValueError: # noqa: E722
continue
if found_valid_if:
start_node = index
break
# Often timing handles are placed inside if blocks,
# check if it is not a known timing call, which should be
# ignored for spanning a parallel section.
for routine_grandchild in routine_child.walk(Reference):
if str(routine_grandchild.name) in timer_routine_names:
# Start node remains None.
start_node = None
break
else:
# Set the start node to the index and keep checking
# grandchildren for calls that invalidate.
# If all are valid, start_node still equals index.
start_node = index
# Now check if we have a satisfactory start node.
if start_node is not None:
break

You can break out of this for loop much quicker by using this setup. I have tested this on bl_diags_mod.F90 and the PSyclone generated output is the same.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My only concern with this, and why I opted for a try / except, is when PSyclone runs into an occurrence where a grandchild node doesn't have a name property, it will just crash.

Wrapping it in a try / except protects us from this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the breaking early on a if not none test I'm pro

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't aware of that. Perhaps adding this in a comment would be a good idea?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries, hopefully this covers it:
4ff21c4

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And for above and below SR advice with try / except preserved:
86496e8

else:
continue
# Default reference for end_node
end_node = None
for index in range((len(routine_children)-1), 0, -1):
# Like above, work backwards through the routine's children.
if isinstance(routine_children[index], Loop): #pylint: disable=R1723
end_node = index + 1
break
elif isinstance(routine_children[index], IfBlock):
found_valid_if = True
for routine_grandchild in \
routine_children[index].walk(Reference):
try:
if str(routine_grandchild.name) in timer_routine_names:
found_valid_if = False
except ValueError: # noqa: E722
continue
if found_valid_if:
end_node = index + 1
break
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, this can be simplified and will be faster than the original. Also tested on bl_diags_mod.F90.

Suggested change
found_valid_if = True
for routine_grandchild in \
routine_children[index].walk(Reference):
try:
if str(routine_grandchild.name) in timer_routine_names:
found_valid_if = False
except ValueError: # noqa: E722
continue
if found_valid_if:
end_node = index + 1
break
for routine_grandchild in \
routine_children[index].walk(Reference):
if str(routine_grandchild.name) in timer_routine_names:
# end_node remains None.
end_node = None
break
else:
# Otherwise, set to correct position and continue
# checking for calls that invalidate this.
end_node = index + 1
# Now check to see if we have a satisfactory end_node
if end_node is not None:
break

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

else:
continue

if start_node and end_node:
try:
OMP_PARALLEL_REGION_TRANS.apply(
routine_children[start_node:end_node])
except (TransformationError, IndexError) as err:
logging.warning("OMPParallelTrans failed: %s", err)
# CCE first private issue with 3.1, to be removed longer term
if get_compiler() == "cce" and first_private_list:
for routine in psyir.walk(Routine):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,7 @@
"ignore_dependencies_for": [
"bl_diag%grad_t_adj"
]
},
"first_private_list": [
"f2_fqw",
"f2_ftl",
"fsc_fqw",
"fsc_ftl",
"non_grad_fqw",
"non_grad_ftl"
]
}
}

SCRIPT_OPTIONS_DICT["ex_flux_uv"+str(FILE_EXTEN)] = {
Expand All @@ -65,56 +57,7 @@
"surf_dep_flux",
"gamma_rhokh_rdz"
]
},
"first_private_list": [
"dfield_inv",
"dz_disc",
"dzlkp1",
"f_field_ent",
"km1"
]
}

SCRIPT_OPTIONS_DICT["bdy_impl3"+str(FILE_EXTEN)] = {
"options": {
"ignore_dependencies_for": [
"dtl1_1",
"ct_prod",
"dqw1_1",
"dqw",
"dtl",
"ct_ctq",
"temp",
"dqw1",
"dtl1",
"ctctq1"
]
},
"safe_pure_calls": [
"oneover_v"
],
"max_threads_parse": True,
"loop_type_init": [
"max_threads",
"blm1",
"tdims_omp_block",
"tdims_seg_block"
]
}

SCRIPT_OPTIONS_DICT["bdy_impl4"+str(FILE_EXTEN)] = {
"options": {
"ignore_dependencies_for": [
"tl",
"qw",
"dtl",
"dqw"
]
},
"loop_type_init": [
"tdims_omp_block",
"tdims_seg_block"
]
}
}

SCRIPT_OPTIONS_DICT["imp_mix"+str(FILE_EXTEN)] = {
Expand All @@ -129,12 +72,6 @@
]
},
"max_threads_parse": True,
"loop_type_init": [
"max_threads",
"blm1",
"pdims_omp_block",
"pdims_seg_block"
]
}

SCRIPT_OPTIONS_DICT["fm_drag"+str(FILE_EXTEN)] = {
Expand All @@ -161,5 +98,3 @@
"wtb"
]
}

# ## Other boundary layer options ##
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

export PSYCLONE_PHYSICS_FILES = \
bl_lsp \
bl_diags_mod \
bm_tau_kernel_mod \
btq_int \
conv_gr_kernel_mod \
Expand Down
Loading
Loading