Files
linguist/samples/Chapel/lulesh.chpl
2015-04-15 07:43:25 -07:00

1696 lines
50 KiB
Chapel

/*
Derived from the DARPA/Livermore Unstructured Lagrangian Explicit
Shock Hydrodynamics (LULESH)
https://computation.llnl.gov/casc/ShockHydro/
Original port to Chapel by Brandon Holt (8/2011). Further
improvements for the sake of performance and/or generality made by
Sung-Eun Choi (12/2011, 11/2012), Jeff Keasler (3/2012), and Brad
Chamberlain (3-4,9-11/2012, 2/2013).
Notes on the Initial Implementation
-----------------------------------
This implementation was designed to mirror the overall structure of
the C++ Lulesh but use Chapel constructs where they can help make
the code more readable, easier to maintain, or more
'elegant'. Function names are preserved for the most part, with some
additional helper functions, and original comments from the C++ code
are interspersed approximately where they belong to give an idea of
how the two codes line up. One major difference for this Chapel
version is the use of a number of module-level variables and
constants.
Status:
This code remains a work-in-progress as we gain further experience
with it. Proposed improvements are noted in the README in this
directory and (in some cases) in TODO comments in the code.
*/
use Time, // to get timing routines for benchmarking
BlockDist; // for block-distributed arrays
use luleshInit; // initialization code for data set
/* The configuration parameters for lulesh. These can be set on the
compiler command line using -s<paramName>=<value>. For example,
chpl -suseBlockDist=true
useBlockDist : says whether or not to block-distribute the arrays.
The default is based on the value of CHPL_COMM, as
an indication of whether this is a single- or multi-
locale execution.
use3DRepresentation : indicates whether the element node arrays
should be stored using a 3D representation
(limiting the execution to cube inputs) or
the more general 1D representation (supporting
arbitrary data sets).
useSparseMaterials : indicates whether sparse domains/arrays should be
used to represent the materials. Sparse domains
are more realistic in that they permit an arbitrary
subset of the problem space to store a material.
Dense domains are sufficient for LULESH since there's
an assumption that the material spans all cells.
printWarnings : prints performance-oriented warnings to prevent
surprises.
*/
config param useBlockDist = (CHPL_COMM != "none"),
use3DRepresentation = false,
useSparseMaterials = true,
printWarnings = true;
//
// Sanity check to ensure that input files aren't used with the 3D
// representation
//
if (use3DRepresentation && (luleshInit.filename != "")) then
halt("The 3D representation does not support reading input from files");
/* Configuration constants: Override defaults on executable's command-line */
config const initialEnergy = 3.948746e+7; // initial energy value
config const showProgress = false, // print time and dt values on each step
debug = false, // print various debug info
doTiming = true, // time the main timestep loop
printCoords = true; // print the final computed coordinates
/* Compile-time constants */
param XI_M = 0x003,
XI_M_SYMM = 0x001,
XI_M_FREE = 0x002,
XI_P = 0x00c,
XI_P_SYMM = 0x004,
XI_P_FREE = 0x008,
ETA_M = 0x030,
ETA_M_SYMM = 0x010,
ETA_M_FREE = 0x020,
ETA_P = 0x0c0,
ETA_P_SYMM = 0x040,
ETA_P_FREE = 0x080,
ZETA_M = 0x300,
ZETA_M_SYMM = 0x100,
ZETA_M_FREE = 0x200,
ZETA_P = 0xc00,
ZETA_P_SYMM = 0x400,
ZETA_P_FREE = 0x800;
/* Set up the problem size */
const (numElems, numNodes) = initProblemSize();
/* Declare abstract problem domains */
const ElemSpace = if use3DRepresentation
then {0..#elemsPerEdge, 0..#elemsPerEdge, 0..#elemsPerEdge}
else {0..#numElems},
NodeSpace = if use3DRepresentation
then {0..#nodesPerEdge, 0..#nodesPerEdge, 0..#nodesPerEdge}
else {0..#numNodes};
/* Declare the (potentially distributed) problem domains */
const Elems = if useBlockDist then ElemSpace dmapped Block(ElemSpace)
else ElemSpace,
Nodes = if useBlockDist then NodeSpace dmapped Block(NodeSpace)
else NodeSpace;
/* The coordinates */
var x, y, z: [Nodes] real;
/* The number of nodes per element. In a rank-independent version,
this could be written 2**rank */
param nodesPerElem = 8;
// We could name this, but chose not to since it doesn't add that much clarity
//
// const elemNeighbors = 1..nodesPerElem;
/* The element-to-node mapping */
var elemToNode: [Elems] nodesPerElem*index(Nodes);
/* the Greek variables */
var lxim, lxip, letam, letap, lzetam, lzetap: [Elems] index(Elems);
/* the X, Y, Z Symmetry values */
var XSym, YSym, ZSym: sparse subdomain(Nodes);
/* Constants */
const u_cut = 1.0e-7, /* velocity tolerance */
hgcoef = 3.0, /* hourglass control */
qstop = 1.0e+12, /* excessive q indicator */
monoq_max_slope = 1.0,
monoq_limiter_mult = 2.0,
e_cut = 1.0e-7, /* energy tolerance */
p_cut = 1.0e-7, /* pressure tolerance */
ss4o3 = 4.0/3.0,
q_cut = 1.0e-7, /* q tolerance */
v_cut = 1.0e-10, /* relative volume tolerance */
qlc_monoq = 0.5, /* linear term coef for q */
qqc_monoq = 2.0/3.0, /* quadratic term coef for q */
qqc = 2.0,
qqc2 = 64.0 * qqc**2,
eosvmax = 1.0e+9,
eosvmin = 1.0e-9,
pmin = 0.0, /* pressure floor */
emin = -1.0e+15, /* energy floor */
dvovmax = 0.1, /* maximum allowable volume change */
refdens = 1.0, /* reference density */
deltatimemultlb = 1.1,
deltatimemultub = 1.2,
dtmax = 1.0e-2; /* maximum allowable time increment */
config const stoptime = 1.0e-2, /* end time for simulation */
maxcycles = max(int), /* max number of cycles to simulate */
dtfixed = -1.0e-7; /* fixed time increment */
/* The list of material elements */
const MatElems: MatElemsType = if useSparseMaterials then enumerateMatElems()
else Elems;
proc MatElemsType type {
if useSparseMaterials {
if (printWarnings && useBlockDist && numLocales > 1) then
writeln("WARNING: The LULESH Material Elements (MatElems) are not yet\n",
" distributed, so result in excessive memory use on,\n",
" and communication with, locale 0\n");
return sparse subdomain(Elems);
} else
return Elems.type;
}
iter enumerateMatElems() {
if (printWarnings && useBlockDist && numLocales > 1) then
writeln("WARNING: generation of matrix elements is serial and\n",
" unlikely to scale");
for i in Elems do
yield i;
}
/* Element fields */
var elemBC: [Elems] int,
e: [Elems] real, // energy
p: [Elems] real, // pressure
q: [Elems] real, // q
ql: [Elems] real, // linear term for q
qq: [Elems] real, // quadratic term for q
v: [Elems] real = 1.0, //relative volume
vnew: [Elems] real,
volo: [Elems] real, // reference volume
delv: [Elems] real, // m_vnew - m_v
vdov: [Elems] real, // volume derivative over volume
arealg: [Elems] real, // elem characteristic length
ss: [Elems] real, // "sound speed"
elemMass: [Elems] real; // mass
/* Nodal fields */
var xd, yd, zd: [Nodes] real, // velocities
xdd, ydd, zdd: [Nodes] real, // acceleration
fx, fy, fz: [Nodes] atomic real, // forces
nodalMass: [Nodes] real; // mass
/* Parameters */
var time = 0.0, // current time
deltatime = 1.0e-7, // variable time increment
dtcourant = 1.0e20, // courant constraint
dthydro = 1.0e20, // volume change constraint
cycle = 0; // iteration count for simulation
proc main() {
if debug then writeln("Lulesh -- Problem Size = ", numElems);
initLulesh();
var st: real;
if doTiming then st = getCurrentTime();
while (time < stoptime && cycle < maxcycles) {
const iterTime = if showProgress then getCurrentTime() else 0.0;
TimeIncrement();
LagrangeLeapFrog();
if debug {
deprintatomic("[[ Forces ]]", fx, fy, fz);
deprint("[[ Positions ]]", x, y, z);
deprint("[[ p, e, q ]]", p, e, q);
}
if showProgress then
writef("time = %er, dt=%er, %s", time, deltatime,
if doTiming then ", elapsed = " + (getCurrentTime()-iterTime) +"\n"
else "\n");
}
if (cycle == maxcycles) {
writeln("Stopped early due to reaching maxnumsteps");
}
if doTiming {
const et = getCurrentTime();
writeln("Total Time: ", et-st);
writeln("Time/Cycle: ", (et-st)/cycle);
}
writeln("Number of cycles: ", cycle);
if printCoords {
var outfile = open("coords.out", iomode.cw);
var writer = outfile.writer();
var fmtstr = if debug then "%1.9re %1.9er %1.9er\n"
else "%1.4er %1.4er %1.4er\n";
for i in Nodes do
writer.writef(fmtstr, x[i], y[i], z[i]);
writer.close();
outfile.close();
}
}
/* Initialization functions */
proc initLulesh() {
// initialize the coordinates
initCoordinates(x,y,z);
// initialize the element to node mapping
initElemToNodeMapping(elemToNode);
// initialize the greek symbols
initGreekVars(lxim, lxip, letam, letap, lzetam, lzetap);
// initialize the symmetry plane locations
initXSyms(XSym);
initYSyms(YSym);
initZSyms(ZSym);
/* embed hexehedral elements in nodal point lattice */
//calculated on the fly using: elemToNodes(i: index(Elems)): index(Nodes)
// initialize the masses
initMasses();
// initialize the boundary conditions
const octantCorner = initBoundaryConditions();
// deposit the energy for Sedov Problem
e[octantCorner] = initialEnergy;
}
proc initMasses() {
// This is a temporary array used to accumulate masses in parallel
// without losing updates by using 'atomic' variables
var massAccum: [Nodes] atomic real;
forall eli in Elems {
var x_local, y_local, z_local: 8*real;
localizeNeighborNodes(eli, x, x_local, y, y_local, z, z_local);
const volume = CalcElemVolume(x_local, y_local, z_local);
volo[eli] = volume;
elemMass[eli] = volume;
for neighbor in elemToNodes[eli] do
massAccum[neighbor].add(volume);
}
// When we're done, copy the accumulated masses into nodalMass, at
// which point the massAccum array can go away (and will at the
// procedure's return
forall i in Nodes do
nodalMass[i] = massAccum[i].read() / 8.0;
if debug {
writeln("ElemMass:");
for mass in elemMass do writeln(mass);
writeln("NodalMass:");
for mass in nodalMass do writeln(mass);
}
}
proc initBoundaryConditions() {
var surfaceNode: [Nodes] int;
forall n in XSym do
surfaceNode[n] = 1;
forall n in YSym do
surfaceNode[n] = 1;
forall n in ZSym do
surfaceNode[n] = 1;
forall e in Elems do {
var mask: int;
for i in 1..nodesPerElem do
mask += surfaceNode[elemToNode[e][i]] << (i-1);
// TODO: make an inlined function for this little idiom? (and below)
if ((mask & 0x0f) == 0x0f) then elemBC[e] |= ZETA_M_SYMM;
if ((mask & 0xf0) == 0xf0) then elemBC[e] |= ZETA_P_SYMM;
if ((mask & 0x33) == 0x33) then elemBC[e] |= ETA_M_SYMM;
if ((mask & 0xcc) == 0xcc) then elemBC[e] |= ETA_P_SYMM;
if ((mask & 0x99) == 0x99) then elemBC[e] |= XI_M_SYMM;
if ((mask & 0x66) == 0x66) then elemBC[e] |= XI_P_SYMM;
}
//
// We find the octant corner by looking for the element with
// all three SYMM flags set, which will have the largest
// integral value. Thus, we can use a maxloc to identify it.
//
const (check, loc) = maxloc reduce zip(elemBC, Elems);
if debug then writeln("Found the octant corner at: ", loc);
if (check != (XI_M_SYMM | ETA_M_SYMM | ZETA_M_SYMM)) then
halt("maxloc got a value of ", check, " at loc ", loc);
// TODO: This is an example of an array that, in a distributed
// memory code, would typically be completely local and only storing
// the local nodes owned by the locale -- noting that some nodes
// are logically owned by multiple locales and therefore would
// redundantly be stored in both locales' surfaceNode arrays -- it's
// essentially local scratchspace that does not need to be communicated
// or kept coherent across locales.
//
surfaceNode = 0;
/* the free surfaces */
var freeSurface: sparse subdomain(Nodes);
// initialize the free surface
initFreeSurface(freeSurface);
forall n in freeSurface do
surfaceNode[n] = 1;
forall e in Elems do {
var mask: int;
for i in 1..nodesPerElem do
mask += surfaceNode[elemToNode[e][i]] << (i-1);
if ((mask & 0x0f) == 0x0f) then elemBC[e] |= ZETA_M_FREE;
if ((mask & 0xf0) == 0xf0) then elemBC[e] |= ZETA_P_FREE;
if ((mask & 0x33) == 0x33) then elemBC[e] |= ETA_M_FREE;
if ((mask & 0xcc) == 0xcc) then elemBC[e] |= ETA_P_FREE;
if ((mask & 0x99) == 0x99) then elemBC[e] |= XI_M_FREE;
if ((mask & 0x66) == 0x66) then elemBC[e] |= XI_P_FREE;
}
if debug {
writeln("elemBC:");
for b in elemBC do writeln(b);
}
return loc;
}
/* Helper functions */
inline proc localizeNeighborNodes(eli: index(Elems),
x: [] real, ref x_local: 8*real,
y: [] real, ref y_local: 8*real,
z: [] real, ref z_local: 8*real) {
for i in 1..nodesPerElem {
const noi = elemToNode[eli][i];
x_local[i] = x[noi];
y_local[i] = y[noi];
z_local[i] = z[noi];
}
}
inline proc TripleProduct(x1, y1, z1, x2, y2, z2, x3, y3, z3) {
return x1*(y2*z3 - z2*y3) + x2*(z1*y3 - y1*z3) + x3*(y1*z2 - z1*y2);
}
proc CalcElemVolume(x, y, z) {
const dx61 = x[7] - x[2],
dy61 = y[7] - y[2],
dz61 = z[7] - z[2],
dx70 = x[8] - x[1],
dy70 = y[8] - y[1],
dz70 = z[8] - z[1],
dx63 = x[7] - x[4],
dy63 = y[7] - y[4],
dz63 = z[7] - z[4],
dx20 = x[3] - x[1],
dy20 = y[3] - y[1],
dz20 = z[3] - z[1],
dx50 = x[6] - x[1],
dy50 = y[6] - y[1],
dz50 = z[6] - z[1],
dx64 = x[7] - x[5],
dy64 = y[7] - y[5],
dz64 = z[7] - z[5],
dx31 = x[4] - x[2],
dy31 = y[4] - y[2],
dz31 = z[4] - z[2],
dx72 = x[8] - x[3],
dy72 = y[8] - y[3],
dz72 = z[8] - z[3],
dx43 = x[5] - x[4],
dy43 = y[5] - y[4],
dz43 = z[5] - z[4],
dx57 = x[6] - x[8],
dy57 = y[6] - y[8],
dz57 = z[6] - z[8],
dx14 = x[2] - x[5],
dy14 = y[2] - y[5],
dz14 = z[2] - z[5],
dx25 = x[3] - x[6],
dy25 = y[3] - y[6],
dz25 = z[3] - z[6];
const volume = TripleProduct(dx31 + dx72, dx63, dx20,
dy31 + dy72, dy63, dy20,
dz31 + dz72, dz63, dz20) +
TripleProduct(dx43 + dx57, dx64, dx70,
dy43 + dy57, dy64, dy70,
dz43 + dz57, dz64, dz70) +
TripleProduct(dx14 + dx25, dx61, dx50,
dy14 + dy25, dy61, dy50,
dz14 + dz25, dz61, dz50);
return volume / 12.0;
}
proc InitStressTermsForElems(p, q, sigxx, sigyy, sigzz: [?D] real) {
forall i in D {
sigxx[i] = -p[i] - q[i];
sigyy[i] = -p[i] - q[i];
sigzz[i] = -p[i] - q[i];
}
}
proc CalcElemShapeFunctionDerivatives(x: 8*real, y: 8*real, z: 8*real,
ref b_x: 8*real,
ref b_y: 8*real,
ref b_z: 8*real,
ref volume: real) {
const fjxxi = .125 * ((x[7]-x[1]) + (x[6]-x[4]) - (x[8]-x[2]) - (x[5]-x[3])),
fjxet = .125 * ((x[7]-x[1]) - (x[6]-x[4]) + (x[8]-x[2]) - (x[5]-x[3])),
fjxze = .125 * ((x[7]-x[1]) + (x[6]-x[4]) + (x[8]-x[2]) + (x[5]-x[3])),
fjyxi = .125 * ((y[7]-y[1]) + (y[6]-y[4]) - (y[8]-y[2]) - (y[5]-y[3])),
fjyet = .125 * ((y[7]-y[1]) - (y[6]-y[4]) + (y[8]-y[2]) - (y[5]-y[3])),
fjyze = .125 * ((y[7]-y[1]) + (y[6]-y[4]) + (y[8]-y[2]) + (y[5]-y[3])),
fjzxi = .125 * ((z[7]-z[1]) + (z[6]-z[4]) - (z[8]-z[2]) - (z[5]-z[3])),
fjzet = .125 * ((z[7]-z[1]) - (z[6]-z[4]) + (z[8]-z[2]) - (z[5]-z[3])),
fjzze = .125 * ((z[7]-z[1]) + (z[6]-z[4]) + (z[8]-z[2]) + (z[5]-z[3]));
/* compute cofactors */
const cjxxi = (fjyet * fjzze) - (fjzet * fjyze),
cjxet = - (fjyxi * fjzze) + (fjzxi * fjyze),
cjxze = (fjyxi * fjzet) - (fjzxi * fjyet),
cjyxi = - (fjxet * fjzze) + (fjzet * fjxze),
cjyet = (fjxxi * fjzze) - (fjzxi * fjxze),
cjyze = - (fjxxi * fjzet) + (fjzxi * fjxet),
cjzxi = (fjxet * fjyze) - (fjyet * fjxze),
cjzet = - (fjxxi * fjyze) + (fjyxi * fjxze),
cjzze = (fjxxi * fjyet) - (fjyxi * fjxet);
/* calculate partials :
this need only be done for l = 0,1,2,3 since , by symmetry ,
(6,7,4,5) = - (0,1,2,3) .
*/
b_x[1] = - cjxxi - cjxet - cjxze;
b_x[2] = cjxxi - cjxet - cjxze;
b_x[3] = cjxxi + cjxet - cjxze;
b_x[4] = - cjxxi + cjxet - cjxze;
b_x[5] = -b_x[3];
b_x[6] = -b_x[4];
b_x[7] = -b_x[1];
b_x[8] = -b_x[2];
b_y[1] = - cjyxi - cjyet - cjyze;
b_y[2] = cjyxi - cjyet - cjyze;
b_y[3] = cjyxi + cjyet - cjyze;
b_y[4] = - cjyxi + cjyet - cjyze;
b_y[5] = -b_y[3];
b_y[6] = -b_y[4];
b_y[7] = -b_y[1];
b_y[8] = -b_y[2];
b_z[1] = - cjzxi - cjzet - cjzze;
b_z[2] = cjzxi - cjzet - cjzze;
b_z[3] = cjzxi + cjzet - cjzze;
b_z[4] = - cjzxi + cjzet - cjzze;
b_z[5] = -b_z[3];
b_z[6] = -b_z[4];
b_z[7] = -b_z[1];
b_z[8] = -b_z[2];
/* calculate jacobian determinant (volume) */
volume = 8.0 * ( fjxet * cjxet + fjyet * cjyet + fjzet * cjzet);
}
proc CalcElemNodeNormals(ref pfx: 8*real, ref pfy: 8*real, ref pfz: 8*real,
x: 8*real, y: 8*real, z: 8*real) {
proc ElemFaceNormal(param n1, param n2, param n3, param n4) {
const bisectX0 = 0.5 * (x[n4] + x[n3] - x[n2] - x[n1]),
bisectY0 = 0.5 * (y[n4] + y[n3] - y[n2] - y[n1]),
bisectZ0 = 0.5 * (z[n4] + z[n3] - z[n2] - z[n1]),
bisectX1 = 0.5 * (x[n3] + x[n2] - x[n4] - x[n1]),
bisectY1 = 0.5 * (y[n3] + y[n2] - y[n4] - y[n1]),
bisectZ1 = 0.5 * (z[n3] + z[n2] - z[n4] - z[n1]),
areaX = 0.25 * (bisectY0 * bisectZ1 - bisectZ0 * bisectY1),
areaY = 0.25 * (bisectZ0 * bisectX1 - bisectX0 * bisectZ1),
areaZ = 0.25 * (bisectX0 * bisectY1 - bisectY0 * bisectX1);
var rx, ry, rz: 8*real; //results
(rx[n1], rx[n2], rx[n3], rx[n4]) = (areaX, areaX, areaX, areaX);
(ry[n1], ry[n2], ry[n3], ry[n4]) = (areaY, areaY, areaY, areaY);
(rz[n1], rz[n2], rz[n3], rz[n4]) = (areaZ, areaZ, areaZ, areaZ);
return (rx, ry, rz);
}
// calculate total normal from each face (faces are made up of
// combinations of nodes)
(pfx, pfy, pfz) = ElemFaceNormal(1,2,3,4) + ElemFaceNormal(1,5,6,2) +
ElemFaceNormal(2,6,7,3) + ElemFaceNormal(3,7,8,4) +
ElemFaceNormal(4,8,5,1) + ElemFaceNormal(5,8,7,6);
}
proc SumElemStressesToNodeForces(b_x: 8*real, b_y: 8*real, b_z: 8*real,
stress_xx:real,
stress_yy:real,
stress_zz: real,
ref fx: 8*real,
ref fy: 8*real,
ref fz: 8*real) {
for i in 1..8 {
fx[i] = -(stress_xx * b_x[i]);
fy[i] = -(stress_yy * b_y[i]);
fz[i] = -(stress_zz * b_z[i]);
}
}
proc CalcElemVolumeDerivative(x: 8*real, y: 8*real, z: 8*real) {
proc VoluDer(param n0, param n1, param n2, param n3, param n4, param n5) {
const ox = (y[n1] + y[n2]) * (z[n0] + z[n1])
- (y[n0] + y[n1]) * (z[n1] + z[n2])
+ (y[n0] + y[n4]) * (z[n3] + z[n4])
- (y[n3] + y[n4]) * (z[n0] + z[n4])
- (y[n2] + y[n5]) * (z[n3] + z[n5])
+ (y[n3] + y[n5]) * (z[n2] + z[n5]),
oy = - (x[n1] + x[n2]) * (z[n0] + z[n1])
+ (x[n0] + x[n1]) * (z[n1] + z[n2])
- (x[n0] + x[n4]) * (z[n3] + z[n4])
+ (x[n3] + x[n4]) * (z[n0] + z[n4])
+ (x[n2] + x[n5]) * (z[n3] + z[n5])
- (x[n3] + x[n5]) * (z[n2] + z[n5]),
oz = - (y[n1] + y[n2]) * (x[n0] + x[n1])
+ (y[n0] + y[n1]) * (x[n1] + x[n2])
- (y[n0] + y[n4]) * (x[n3] + x[n4])
+ (y[n3] + y[n4]) * (x[n0] + x[n4])
+ (y[n2] + y[n5]) * (x[n3] + x[n5])
- (y[n3] + y[n5]) * (x[n2] + x[n5]);
return (ox/12.0, oy/12.0, oz/12.0);
}
var dvdx, dvdy, dvdz: 8*real;
(dvdx[1], dvdy[1], dvdz[1]) = VoluDer(2,3,4,5,6,8);
(dvdx[4], dvdy[4], dvdz[4]) = VoluDer(1,2,3,8,5,7);
(dvdx[3], dvdy[3], dvdz[3]) = VoluDer(4,1,2,7,8,6);
(dvdx[2], dvdy[2], dvdz[2]) = VoluDer(3,4,1,6,7,5);
(dvdx[5], dvdy[5], dvdz[5]) = VoluDer(8,7,6,1,4,2);
(dvdx[6], dvdy[6], dvdz[6]) = VoluDer(5,8,7,2,1,3);
(dvdx[7], dvdy[7], dvdz[7]) = VoluDer(6,5,8,3,2,4);
(dvdx[8], dvdy[8], dvdz[8]) = VoluDer(7,6,5,4,3,1);
return (dvdx, dvdy, dvdz);
}
inline proc CalcElemFBHourglassForce(xd: 8*real, yd: 8*real, zd: 8*real,
hourgam: 8*(4*real),
coefficient: real,
ref hgfx: 8*real,
ref hgfy: 8*real,
ref hgfz: 8*real) {
var hx, hy, hz: 4*real;
// reduction
for i in 1..4 {
for j in 1..8 {
hx[i] += hourgam[j][i] * xd[j];
hy[i] += hourgam[j][i] * yd[j];
hz[i] += hourgam[j][i] * zd[j];
}
}
for i in 1..8 {
var shx, shy, shz: real;
for j in 1..4 {
shx += hourgam[i][j] * hx[j];
shy += hourgam[i][j] * hy[j];
shz += hourgam[i][j] * hz[j];
}
hgfx[i] = coefficient * shx;
hgfy[i] = coefficient * shy;
hgfz[i] = coefficient * shz;
}
}
proc CalcElemCharacteristicLength(x, y, z, volume) {
proc AreaFace(param p0, param p1, param p2, param p3) {
const fx = (x[p2] - x[p0]) - (x[p3] - x[p1]),
fy = (y[p2] - y[p0]) - (y[p3] - y[p1]),
fz = (z[p2] - z[p0]) - (z[p3] - z[p1]),
gx = (x[p2] - x[p0]) + (x[p3] - x[p1]),
gy = (y[p2] - y[p0]) + (y[p3] - y[p1]),
gz = (z[p2] - z[p0]) + (z[p3] - z[p1]),
area = (fx * fx + fy * fy + fz * fz) *
(gx * gx + gy * gy + gz * gz) -
(fx * gx + fy * gy + fz * gz) *
(fx * gx + fy * gy + fz * gz);
return area ;
}
const charLength = max(AreaFace(1, 2, 3, 4),
AreaFace(5, 6, 7, 8),
AreaFace(1, 2, 6, 5),
AreaFace(2, 3, 7, 6),
AreaFace(3, 4, 8, 7),
AreaFace(4, 1, 5, 8));
return 4.0 * volume / sqrt(charLength);
}
proc CalcElemVelocityGradient(xvel, yvel, zvel, pfx, pfy, pfz,
detJ, ref d: 6*real) {
const inv_detJ = 1.0 / detJ;
d[1] = inv_detJ * ( pfx[1] * (xvel[1]-xvel[7])
+ pfx[2] * (xvel[2]-xvel[8])
+ pfx[3] * (xvel[3]-xvel[5])
+ pfx[4] * (xvel[4]-xvel[6]) );
d[2] = inv_detJ * ( pfy[1] * (yvel[1]-yvel[7])
+ pfy[2] * (yvel[2]-yvel[8])
+ pfy[3] * (yvel[3]-yvel[5])
+ pfy[4] * (yvel[4]-yvel[6]) );
d[3] = inv_detJ * ( pfz[1] * (zvel[1]-zvel[7])
+ pfz[2] * (zvel[2]-zvel[8])
+ pfz[3] * (zvel[3]-zvel[5])
+ pfz[4] * (zvel[4]-zvel[6]) );
const dyddx = inv_detJ * ( pfx[1] * (yvel[1]-yvel[7])
+ pfx[2] * (yvel[2]-yvel[8])
+ pfx[3] * (yvel[3]-yvel[5])
+ pfx[4] * (yvel[4]-yvel[6]) ),
dxddy = inv_detJ * ( pfy[1] * (xvel[1]-xvel[7])
+ pfy[2] * (xvel[2]-xvel[8])
+ pfy[3] * (xvel[3]-xvel[5])
+ pfy[4] * (xvel[4]-xvel[6]) ),
dzddx = inv_detJ * ( pfx[1] * (zvel[1]-zvel[7])
+ pfx[2] * (zvel[2]-zvel[8])
+ pfx[3] * (zvel[3]-zvel[5])
+ pfx[4] * (zvel[4]-zvel[6]) ),
dxddz = inv_detJ * ( pfz[1] * (xvel[1]-xvel[7])
+ pfz[2] * (xvel[2]-xvel[8])
+ pfz[3] * (xvel[3]-xvel[5])
+ pfz[4] * (xvel[4]-xvel[6]) ),
dzddy = inv_detJ * ( pfy[1] * (zvel[1]-zvel[7])
+ pfy[2] * (zvel[2]-zvel[8])
+ pfy[3] * (zvel[3]-zvel[5])
+ pfy[4] * (zvel[4]-zvel[6]) ),
dyddz = inv_detJ * ( pfz[1] * (yvel[1]-yvel[7])
+ pfz[2] * (yvel[2]-yvel[8])
+ pfz[3] * (yvel[3]-yvel[5])
+ pfz[4] * (yvel[4]-yvel[6]) );
d[6] = 0.5 * ( dxddy + dyddx );
d[5] = 0.5 * ( dxddz + dzddx );
d[4] = 0.5 * ( dzddy + dyddz );
}
proc CalcPressureForElems(p_new: [?D] real, bvc, pbvc,
e_old, compression, vnewc,
pmin: real, p_cut: real, eosvmax: real) {
//
// TODO: Uncomment local once sparse domain is distributed
//
forall i in D /* do local */ {
const c1s = 2.0 / 3.0;
bvc[i] = c1s * (compression[i] + 1.0);
pbvc[i] = c1s;
}
forall i in D {
p_new[i] = bvc[i] * e_old[i];
if abs(p_new[i]) < p_cut then p_new[i] = 0.0;
if vnewc[i] >= eosvmax then p_new[i] = 0.0; //impossible?
if p_new[i] < pmin then p_new[i] = pmin;
}
}
proc TimeIncrement() {
var targetdt = stoptime - time;
if dtfixed <= 0.0 && cycle != 0 { //don't do this the first cycle
var olddt = deltatime,
newdt = 1.0e20;
if dtcourant < newdt then newdt = dtcourant / 2.0;
if dthydro < newdt then newdt = 2.0/3.0 * dthydro;
const ratio = newdt / olddt;
if ratio >= 1.0 {
if ratio < deltatimemultlb then newdt = olddt;
else if ratio > deltatimemultub then newdt = olddt * deltatimemultub;
}
if newdt > dtmax then newdt = dtmax;
deltatime = newdt;
}
/* TRY TO PREVENT VERY SMALL SCALING ON THE NEXT CYCLE */
if targetdt > deltatime && targetdt < (4.0/3.0 * deltatime) {
targetdt = 2.0/3.0 * deltatime;
}
if targetdt < deltatime then deltatime = targetdt;
time += deltatime;
cycle += 1;
}
inline proc LagrangeLeapFrog() {
/* calculate nodal forces, accelerations, velocities, positions, with
* applied boundary conditions and slide surface considerations */
LagrangeNodal();
/* calculate element quantities (i.e. velocity gradient & q), and update
* material states */
LagrangeElements();
CalcTimeConstraintsForElems();
}
inline proc LagrangeNodal() {
CalcForceForNodes();
CalcAccelerationForNodes();
ApplyAccelerationBoundaryConditionsForNodes();
CalcVelocityForNodes(deltatime, u_cut);
CalcPositionForNodes(deltatime);
}
inline proc LagrangeElements() {
CalcLagrangeElements();
/* Calculate Q. (Monotonic q option requires communication) */
CalcQForElems();
ApplyMaterialPropertiesForElems();
UpdateVolumesForElems();
}
inline proc CalcTimeConstraintsForElems() {
/* evaluate time constraint */
CalcCourantConstraintForElems();
/* check hydro constraint */
CalcHydroConstraintForElems();
}
inline proc computeDTF(indx) {
const myvdov = vdov[indx];
if myvdov == 0.0 then
return max(real);
const myarealg = arealg[indx];
var dtf = ss[indx]**2;
if myvdov < 0.0 then
dtf += qqc2 * myarealg**2 * myvdov**2;
dtf = sqrt(dtf);
dtf = myarealg / dtf;
return dtf;
}
proc CalcCourantConstraintForElems() {
const val = min reduce [indx in MatElems] computeDTF(indx);
if (val != max(real)) then
dtcourant = val;
}
inline proc calcDtHydroTmp(indx) {
const myvdov = vdov[indx];
if (myvdov == 0.0) then
return max(real);
else
return dvovmax / (abs(myvdov)+1.0e-20);
}
proc CalcHydroConstraintForElems() {
const val = min reduce [indx in MatElems] calcDtHydroTmp(indx);
if (val != max(real)) then
dthydro = val;
}
/* calculate nodal forces, accelerations, velocities, positions, with
* applied boundary conditions and slide surface considerations */
proc CalcForceForNodes() {
//zero out all forces
forall x in fx do x.write(0);
forall y in fy do y.write(0);
forall z in fz do z.write(0);
/* Calcforce calls partial, force, hourq */
CalcVolumeForceForElems();
/* Calculate Nodal Forces at domain boundaries */
// this was commented out in C++ code, so left out here
}
proc CalcVolumeForceForElems() {
var sigxx, sigyy, sigzz, determ: [Elems] real;
/* Sum contributions to total stress tensor */
InitStressTermsForElems(p, q, sigxx, sigyy, sigzz);
/* call elemlib stress integration loop to produce nodal forces from
material stresses. */
IntegrateStressForElems(sigxx, sigyy, sigzz, determ);
/* check for negative element volume */
forall e in Elems {
if determ[e] <= 0.0 then
halt("can't have negative volume (determ[", e, "]=", determ[e], ")");
}
CalcHourglassControlForElems(determ);
}
proc IntegrateStressForElems(sigxx, sigyy, sigzz, determ) {
forall k in Elems {
var b_x, b_y, b_z: 8*real;
var x_local, y_local, z_local: 8*real;
localizeNeighborNodes(k, x, x_local, y, y_local, z, z_local);
var fx_local, fy_local, fz_local: 8*real;
local {
/* Volume calculation involves extra work for numerical consistency. */
CalcElemShapeFunctionDerivatives(x_local, y_local, z_local,
b_x, b_y, b_z, determ[k]);
CalcElemNodeNormals(b_x, b_y, b_z, x_local, y_local, z_local);
SumElemStressesToNodeForces(b_x, b_y, b_z, sigxx[k], sigyy[k], sigzz[k],
fx_local, fy_local, fz_local);
}
for (noi, t) in elemToNodesTuple(k) {
fx[noi].add(fx_local[t]);
fy[noi].add(fy_local[t]);
fz[noi].add(fz_local[t]);
}
}
}
proc CalcHourglassControlForElems(determ) {
var dvdx, dvdy, dvdz, x8n, y8n, z8n: [Elems] 8*real;
forall eli in Elems {
/* Collect domain nodes to elem nodes */
var x1, y1, z1: 8*real;
localizeNeighborNodes(eli, x, x1, y, y1, z, z1);
var pfx, pfy, pfz: 8*real;
local {
/* load into temporary storage for FB Hour Glass control */
(dvdx[eli], dvdy[eli], dvdz[eli]) = CalcElemVolumeDerivative(x1, y1, z1);
}
x8n[eli] = x1;
y8n[eli] = y1;
z8n[eli] = z1;
determ[eli] = volo[eli] * v[eli];
}
/* Do a check for negative volumes */
forall e in Elems {
if v[e] <= 0.0 then
halt("can't have negative (or zero) volume. (in Hourglass, v[", e, "]=", v[e], ")");
}
if hgcoef > 0.0 {
CalcFBHourglassForceForElems(determ, x8n, y8n, z8n, dvdx, dvdy, dvdz);
}
}
const gammaCoef: 4*(8*real) = // WAS: [1..4, 1..8] real =
(( 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0),
( 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0),
( 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0),
(-1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0));
/* Calculates the Flanagan-Belytschko anti-hourglass force. */
proc CalcFBHourglassForceForElems(determ, x8n, y8n, z8n, dvdx, dvdy, dvdz) {
/* compute the hourglass modes */
forall eli in Elems {
var hourgam: 8*(4*real);
const volinv = 1.0 / determ[eli];
var ss1, mass1, volume13: real;
var hgfx, hgfy, hgfz: 8*real;
var coefficient: real;
var xd1, yd1, zd1: 8*real;
localizeNeighborNodes(eli, xd, xd1, yd, yd1, zd, zd1);
/* TODO: Can we enable this local block? */
// local {
for i in 1..4 {
var hourmodx, hourmody, hourmodz: real;
// reduction
for j in 1..8 {
hourmodx += x8n[eli][j] * gammaCoef[i][j];
hourmody += y8n[eli][j] * gammaCoef[i][j];
hourmodz += z8n[eli][j] * gammaCoef[i][j];
}
for j in 1..8 {
hourgam[j][i] = gammaCoef[i][j] - volinv *
(dvdx[eli][j] * hourmodx +
dvdy[eli][j] * hourmody +
dvdz[eli][j] * hourmodz);
}
}
/* compute forces */
/* store forces into h arrays (force arrays) */
ss1 = ss[eli];
mass1 = elemMass[eli];
volume13 = cbrt(determ[eli]);
coefficient = - hgcoef * 0.01 * ss1 * mass1 / volume13;
CalcElemFBHourglassForce(xd1, yd1, zd1, hourgam, coefficient,
hgfx, hgfy, hgfz);
// } // end local
for (noi,i) in elemToNodesTuple(eli) {
fx[noi].add(hgfx[i]);
fy[noi].add(hgfy[i]);
fz[noi].add(hgfz[i]);
}
}
}
proc CalcAccelerationForNodes() {
forall noi in Nodes do local {
xdd[noi] = fx[noi].read() / nodalMass[noi];
ydd[noi] = fy[noi].read() / nodalMass[noi];
zdd[noi] = fz[noi].read() / nodalMass[noi];
}
}
proc ApplyAccelerationBoundaryConditionsForNodes() {
// TODO: Shouldn't we be able to write these as follows?
//
// xdd[XSym] = 0.0;
// ydd[YSym] = 0.0;
// zdd[ZSym] = 0.0;
forall x in XSym do xdd[x] = 0.0;
forall y in YSym do ydd[y] = 0.0;
forall z in ZSym do zdd[z] = 0.0;
}
proc CalcVelocityForNodes(dt: real, u_cut: real) {
forall i in Nodes do local {
var xdtmp = xd[i] + xdd[i] * dt,
ydtmp = yd[i] + ydd[i] * dt,
zdtmp = zd[i] + zdd[i] * dt;
if abs(xdtmp) < u_cut then xdtmp = 0.0;
if abs(ydtmp) < u_cut then ydtmp = 0.0;
if abs(zdtmp) < u_cut then zdtmp = 0.0;
xd[i] = xdtmp;
yd[i] = ydtmp;
zd[i] = zdtmp;
}
}
proc CalcPositionForNodes(dt: real) {
forall ijk in Nodes {
x[ijk] += xd[ijk] * dt;
y[ijk] += yd[ijk] * dt;
z[ijk] += zd[ijk] * dt;
}
}
// sungeun: Temporary array reused throughout
proc CalcLagrangeElements() {
var dxx, dyy, dzz: [Elems] real;
CalcKinematicsForElems(dxx, dyy, dzz, deltatime);
// element loop to do some stuff not included in the elemlib function.
forall k in Elems do local {
vdov[k] = dxx[k] + dyy[k] + dzz[k];
var vdovthird = vdov[k] / 3.0;
dxx[k] -= vdovthird;
dyy[k] -= vdovthird;
dzz[k] -= vdovthird;
}
// See if any volumes are negative, and take appropriate action.
forall e in Elems {
if vnew[e] <= 0.0 then
halt("can't have negative volume (vnew[", e, "]=", vnew[e], ")");
}
}
proc CalcKinematicsForElems(dxx, dyy, dzz, const dt: real) {
// loop over all elements
forall k in Elems {
var b_x, b_y, b_z: 8*real,
d: 6*real,
detJ: real;
//get nodal coordinates from global arrays and copy into local arrays
var x_local, y_local, z_local: 8*real;
localizeNeighborNodes(k, x, x_local, y, y_local, z, z_local);
//get nodal velocities from global arrays and copy into local arrays
var xd_local, yd_local, zd_local: 8*real;
localizeNeighborNodes(k, xd, xd_local, yd, yd_local, zd, zd_local);
var dt2 = 0.5 * dt; //wish this was local, too...
local {
//volume calculations
const volume = CalcElemVolume(x_local, y_local, z_local);
const relativeVolume = volume / volo[k];
vnew[k] = relativeVolume;
delv[k] = relativeVolume - v[k];
//set characteristic length
arealg[k] = CalcElemCharacteristicLength(x_local, y_local, z_local,
volume);
for i in 1..8 {
x_local[i] -= dt2 * xd_local[i];
y_local[i] -= dt2 * yd_local[i];
z_local[i] -= dt2 * zd_local[i];
}
CalcElemShapeFunctionDerivatives(x_local, y_local, z_local,
b_x, b_y, b_z, detJ);
CalcElemVelocityGradient(xd_local, yd_local, zd_local, b_x, b_y, b_z,
detJ, d);
}
// put velocity gradient quantities into their global arrays.
dxx[k] = d[1];
dyy[k] = d[2];
dzz[k] = d[3];
}
}
// sungeun: Temporary array reused throughout
/* velocity gradient */
var delv_xi, delv_eta, delv_zeta: [Elems] real;
/* position gradient */
var delx_xi, delx_eta, delx_zeta: [Elems] real;
proc CalcQForElems() {
// MONOTONIC Q option
/* Calculate velocity gradients */
CalcMonotonicQGradientsForElems(delv_xi, delv_eta, delv_zeta,
delx_xi, delx_eta, delx_zeta);
/* Transfer veloctiy gradients in the first order elements */
/* problem->commElements->Transfer(CommElements::monoQ) ; */
CalcMonotonicQForElems(delv_xi, delv_eta, delv_zeta,
delx_xi, delx_eta, delx_zeta);
/* Don't allow excessive artificial viscosity */
forall e in Elems {
if q[e] > qstop then
halt("Excessive artificial viscosity! (q[", e, "]=", q[e], ")");
}
}
// sungeun: Temporary array reused throughout
var vnewc: [MatElems] real;
/* Expose all of the variables needed for material evaluation */
proc ApplyMaterialPropertiesForElems() {
forall i in MatElems do vnewc[i] = vnew[i];
if eosvmin != 0.0 then
[c in vnewc] if c < eosvmin then c = eosvmin;
if eosvmax != 0.0 then
[c in vnewc] if c > eosvmax then c = eosvmax;
// old comment: The following loop should compute min/max reductions;
// currently, race-y
forall matelm in MatElems {
var vc = v[matelm];
if eosvmin != 0.0 && vc < eosvmin then vc = eosvmin;
if eosvmax != 0.0 && vc > eosvmax then vc = eosvmax;
if vc <= 0.0 {
writeln("Volume error (in ApplyMaterialProperiesForElems).");
exit(1);
}
}
EvalEOSForElems(vnewc);
}
proc UpdateVolumesForElems() {
forall i in Elems do local {
var tmpV = vnew[i];
if abs(tmpV-1.0) < v_cut then tmpV = 1.0;
v[i] = tmpV;
}
}
proc CalcMonotonicQGradientsForElems(delv_xi, delv_eta, delv_zeta,
delx_xi, delx_eta, delx_zeta) {
forall eli in Elems {
const ptiny = 1.0e-36;
var xl, yl, zl: 8*real;
localizeNeighborNodes(eli, x, xl, y, yl, z, zl);
var xvl, yvl, zvl: 8*real;
localizeNeighborNodes(eli, xd, xvl, yd, yvl, zd, zvl);
local {
const vol = volo[eli] * vnew[eli],
norm = 1.0 / (vol + ptiny);
var ax, ay, az, dxv, dyv, dzv: real;
const dxj = -0.25*((xl[1]+xl[2]+xl[6]+xl[5])-(xl[4]+xl[3]+xl[7]+xl[8])),
dyj = -0.25*((yl[1]+yl[2]+yl[6]+yl[5])-(yl[4]+yl[3]+yl[7]+yl[8])),
dzj = -0.25*((zl[1]+zl[2]+zl[6]+zl[5])-(zl[4]+zl[3]+zl[7]+zl[8])),
dxi = 0.25*((xl[2]+xl[3]+xl[7]+xl[6])-(xl[1]+xl[4]+xl[8]+xl[5])),
dyi = 0.25*((yl[2]+yl[3]+yl[7]+yl[6])-(yl[1]+yl[4]+yl[8]+yl[5])),
dzi = 0.25*((zl[2]+zl[3]+zl[7]+zl[6])-(zl[1]+zl[4]+zl[8]+zl[5])),
dxk = 0.25*((xl[5]+xl[6]+xl[7]+xl[8])-(xl[1]+xl[2]+xl[3]+xl[4])),
dyk = 0.25*((yl[5]+yl[6]+yl[7]+yl[8])-(yl[1]+yl[2]+yl[3]+yl[4])),
dzk = 0.25*((zl[5]+zl[6]+zl[7]+zl[8])-(zl[1]+zl[2]+zl[3]+zl[4]));
/* find delvk and delxk ( i cross j ) */
ax = dyi*dzj - dzi*dyj;
ay = dzi*dxj - dxi*dzj;
az = dxi*dyj - dyi*dxj;
delx_zeta[eli] = vol / sqrt(ax*ax + ay*ay + az*az + ptiny);
ax *= norm;
ay *= norm;
az *= norm;
dxv = 0.25*((xvl[5]+xvl[6]+xvl[7]+xvl[8])-(xvl[1]+xvl[2]+xvl[3]+xvl[4]));
dyv = 0.25*((yvl[5]+yvl[6]+yvl[7]+yvl[8])-(yvl[1]+yvl[2]+yvl[3]+yvl[4]));
dzv = 0.25*((zvl[5]+zvl[6]+zvl[7]+zvl[8])-(zvl[1]+zvl[2]+zvl[3]+zvl[4]));
delv_zeta[eli] = ax*dxv + ay*dyv + az*dzv;
/* find delxi and delvi ( j cross k ) */
ax = dyj*dzk - dzj*dyk;
ay = dzj*dxk - dxj*dzk;
az = dxj*dyk - dyj*dxk;
delx_xi[eli] = vol / sqrt(ax*ax + ay*ay + az*az + ptiny) ;
ax *= norm;
ay *= norm;
az *= norm;
dxv = 0.25*((xvl[2]+xvl[3]+xvl[7]+xvl[6])-(xvl[1]+xvl[4]+xvl[8]+xvl[5]));
dyv = 0.25*((yvl[2]+yvl[3]+yvl[7]+yvl[6])-(yvl[1]+yvl[4]+yvl[8]+yvl[5]));
dzv = 0.25*((zvl[2]+zvl[3]+zvl[7]+zvl[6])-(zvl[1]+zvl[4]+zvl[8]+zvl[5]));
delv_xi[eli] = ax*dxv + ay*dyv + az*dzv ;
/* find delxj and delvj ( k cross i ) */
ax = dyk*dzi - dzk*dyi;
ay = dzk*dxi - dxk*dzi;
az = dxk*dyi - dyk*dxi;
delx_eta[eli] = vol / sqrt(ax*ax + ay*ay + az*az + ptiny) ;
ax *= norm;
ay *= norm;
az *= norm;
dxv= -0.25*((xvl[1]+xvl[2]+xvl[6]+xvl[5])-(xvl[4]+xvl[3]+xvl[7]+xvl[8]));
dyv= -0.25*((yvl[1]+yvl[2]+yvl[6]+yvl[5])-(yvl[4]+yvl[3]+yvl[7]+yvl[8]));
dzv= -0.25*((zvl[1]+zvl[2]+zvl[6]+zvl[5])-(zvl[4]+zvl[3]+zvl[7]+zvl[8]));
delv_eta[eli] = ax*dxv + ay*dyv + az*dzv ;
} /* local */
} /* forall eli */
}
proc CalcMonotonicQForElems(delv_xi, delv_eta, delv_zeta,
delx_xi, delx_eta, delx_zeta) {
//got rid of call through to "CalcMonotonicQRegionForElems"
forall i in MatElems {
const ptiny = 1.0e-36;
const bcMask = elemBC[i];
var norm, delvm, delvp: real;
/* phixi */
norm = 1.0 / (delv_xi[i] + ptiny);
select bcMask & XI_M {
when 0 do delvm = delv_xi[lxim(i)];
when XI_M_SYMM do delvm = delv_xi[i];
when XI_M_FREE do delvm = 0.0;
}
select bcMask & XI_P {
when 0 do delvp = delv_xi[lxip(i)];
when XI_P_SYMM do delvp = delv_xi[i];
when XI_P_FREE do delvp = 0.0;
}
delvm *= norm;
delvp *= norm;
var phixi = 0.5 * (delvm + delvp);
delvm *= monoq_limiter_mult;
delvp *= monoq_limiter_mult;
if delvm < phixi then phixi = delvm;
if delvp < phixi then phixi = delvp;
if phixi < 0 then phixi = 0.0;
if phixi > monoq_max_slope then phixi = monoq_max_slope;
/* phieta */
norm = 1.0 / (delv_eta[i] + ptiny);
select bcMask & ETA_M {
when 0 do delvm = delv_eta[letam[i]];
when ETA_M_SYMM do delvm = delv_eta[i];
when ETA_M_FREE do delvm = 0.0;
}
select bcMask & ETA_P {
when 0 do delvp = delv_eta[letap[i]];
when ETA_P_SYMM do delvp = delv_eta[i];
when ETA_P_FREE do delvp = 0.0;
}
delvm = delvm * norm;
delvp = delvp * norm;
var phieta = 0.5 * (delvm + delvp);
delvm *= monoq_limiter_mult;
delvp *= monoq_limiter_mult;
if delvm < phieta then phieta = delvm;
if delvp < phieta then phieta = delvp;
if phieta < 0.0 then phieta = 0.0;
if phieta > monoq_max_slope then phieta = monoq_max_slope;
/* phizeta */
norm = 1.0 / (delv_zeta[i] + ptiny);
select bcMask & ZETA_M {
when 0 do delvm = delv_zeta[lzetam[i]];
when ZETA_M_SYMM do delvm = delv_zeta[i];
when ZETA_M_FREE do delvm = 0.0;
}
select bcMask & ZETA_P {
when 0 do delvp = delv_zeta[lzetap[i]];
when ZETA_P_SYMM do delvp = delv_zeta[i];
when ZETA_P_FREE do delvp = 0.0;
}
delvm = delvm * norm;
delvp = delvp * norm;
var phizeta = 0.5 * (delvm + delvp);
delvm *= monoq_limiter_mult;
delvp *= monoq_limiter_mult;
if delvm < phizeta then phizeta = delvm;
if delvp < phizeta then phizeta = delvp;
if phizeta < 0.0 then phizeta = 0.0;
if phizeta > monoq_max_slope then phizeta = monoq_max_slope;
/* Remove length scale */
var qlin, qquad: real;
if vdov[i] > 0.0 {
qlin = 0.0;
qquad = 0.0;
} else {
var delvxxi = delv_xi[i] * delx_xi[i],
delvxeta = delv_eta[i] * delx_eta[i],
delvxzeta = delv_zeta[i] * delx_zeta[i];
if delvxxi > 0.0 then delvxxi = 0.0;
if delvxeta > 0.0 then delvxeta = 0.0;
if delvxzeta > 0.0 then delvxzeta = 0.0;
const rho = elemMass[i] / (volo[i] * vnew[i]);
qlin = -qlc_monoq * rho *
( delvxxi * (1.0 - phixi) +
delvxeta * (1.0 - phieta) +
delvxzeta * (1.0 - phizeta));
qquad = qqc_monoq * rho *
( delvxxi**2 * (1.0 - phixi**2) +
delvxeta**2 * (1.0 - phieta**2) +
delvxzeta**2 * (1.0 - phizeta**2));
}
qq[i] = qquad;
ql[i] = qlin;
}
}
proc EvalEOSForElems(vnewc) {
const rho0 = refdens;
var e_old, delvc, p_old, q_old, compression, compHalfStep,
qq_old, ql_old, work, p_new, e_new, q_new, bvc, pbvc: [Elems] real;
/* compress data, minimal set */
forall i in MatElems {
e_old[i] = e[i];
delvc[i] = delv[i];
p_old[i] = p[i];
q_old[i] = q[i];
qq_old[i] = qq[i];
ql_old[i] = ql[i];
}
//
// TODO: Uncomment local once sparse domain is distributed
//
forall i in Elems /* do local */ {
compression[i] = 1.0 / vnewc[i] - 1.0;
const vchalf = vnewc[i] - delvc[i] * 0.5;
compHalfStep[i] = 1.0 / vchalf - 1.0;
}
/* Check for v > eosvmax or v < eosvmin */
// (note: I think this was already checked for in calling function!)
if eosvmin != 0.0 {
forall i in Elems {
if vnewc[i] <= eosvmin then compHalfStep[i] = compression[i];
}
}
if eosvmax != 0.0 {
forall i in Elems {
if vnewc[i] >= eosvmax {
p_old[i] = 0.0;
compression[i] = 0.0;
compHalfStep[i] = 0.0;
}
}
}
CalcEnergyForElems(p_new, e_new, q_new, bvc, pbvc,
p_old, e_old, q_old, compression, compHalfStep,
vnewc, work, delvc, qq_old, ql_old);
forall i in MatElems {
p[i] = p_new[i];
e[i] = e_new[i];
q[i] = q_new[i];
}
CalcSoundSpeedForElems(vnewc, rho0, e_new, p_new, pbvc, bvc);
}
proc CalcEnergyForElems(p_new, e_new, q_new, bvc, pbvc,
p_old, e_old, q_old, compression, compHalfStep,
vnewc, work, delvc, qq_old, ql_old) {
// TODO: might need to move these consts into foralls or global
// Otherwise, they live on Locale0 and everyone else has to do
// remote reads. OR: Check if they're remote value forwarded.
const rho0 = refdens;
const sixth = 1.0 / 6.0;
var pHalfStep: [MatElems] real;
forall i in Elems {
e_new[i] = e_old[i] - 0.5 * delvc[i] * (p_old[i] + q_old[i])
+ 0.5 * work[i];
if e_new[i] < emin then e_new[i] = emin;
}
CalcPressureForElems(pHalfStep, bvc, pbvc, e_new, compHalfStep,
vnewc, pmin, p_cut, eosvmax);
forall i in Elems {
const vhalf = 1.0 / (1.0 + compHalfStep[i]);
if delvc[i] > 0.0 {
q_new[i] = 0.0;
} else {
var ssc = (pbvc[i] * e_new[i] + vhalf**2 * bvc[i] * pHalfStep[i]) / rho0;
if ssc <= 0.0 then ssc = 0.333333e-36;
else ssc = sqrt(ssc);
q_new[i] = ssc * ql_old[i] + qq_old[i];
}
e_new[i] += 0.5 * delvc[i]
* (3.0*(p_old[i] + q_old[i]) - 4.0*(pHalfStep[i] + q_new[i]));
}
forall i in Elems {
e_new[i] += 0.5 * work[i];
if abs(e_new[i] < e_cut) then e_new[i] = 0.0;
if e_new[i] < emin then e_new[i] = emin;
}
CalcPressureForElems(p_new, bvc, pbvc, e_new, compression, vnewc, pmin,
p_cut, eosvmax);
forall i in Elems {
var q_tilde:real;
if delvc[i] > 0.0 {
q_tilde = 0.0;
} else {
var ssc = (pbvc[i] * e_new[i] + vnewc[i]**2 * bvc[i] * p_new[i] ) / rho0;
if ssc <= 0.0 then ssc = 0.333333e-36;
else ssc = sqrt(ssc);
q_tilde = ssc * ql_old[i] + qq_old[i];
}
e_new[i] -= (7.0*(p_old[i] + q_old[i])
- 8.0*(pHalfStep[i] + q_new[i])
+ (p_new[i] + q_tilde)) * delvc[i] * sixth;
if abs(e_new[i]) < e_cut then e_new[i] = 0.0;
if e_new[i] < emin then e_new[i] = emin;
}
CalcPressureForElems(p_new, bvc, pbvc, e_new, compression, vnewc, pmin,
p_cut, eosvmax);
//
// TODO: Uncomment local once sparse domain is distributed
//
forall i in Elems /* do local */ {
if delvc[i] <= 0.0 {
var ssc = (pbvc[i] * e_new[i] + vnewc[i]**2 * bvc[i] * p_new[i] ) / rho0;
if ssc <= 0.0 then ssc = 0.333333e-36;
else ssc = sqrt(ssc);
q_new[i] = ssc * ql_old[i] + qq_old[i];
if abs(q_new[i]) < q_cut then q_new[i] = 0.0;
}
}
}
proc CalcSoundSpeedForElems(vnewc, rho0:real, enewc, pnewc, pbvc, bvc) {
// TODO: Open question: If we had multiple materials, should (a) ss
// be zeroed and accumulated into, and (b) updated atomically to
// avoid losing updates? (Jeff will go back and think on this)
//
forall i in MatElems {
var ssTmp = (pbvc[i] * enewc[i] + vnewc[i]**2 * bvc[i] * pnewc[i]) / rho0;
if ssTmp <= 1.111111e-36 then ssTmp = 1.111111e-36;
ss[i] = sqrt(ssTmp);
}
}
iter elemToNodes(elem) {
for i in 1..nodesPerElem do
yield elemToNode[elem][i];
}
iter elemToNodesTuple(e) {
for i in 1..nodesPerElem do
yield (elemToNode[e][i], i);
}
proc deprint(title:string, x:[?D] real, y:[D] real, z:[D] real) {
writeln(title);
for i in D do
writef("%3i: %3.4er %3.4er %3.4er\n",
if use3DRepresentation then idx3DTo1D(i, D.dim(1).size) else i,
x[i], y[i], z[i]);
}
proc deprintatomic(title:string, x:[?D] atomic real, y:[] atomic real, z:[] atomic real) {
writeln(title);
for i in D do
writef("%3i: %3.4er %3.4er %3.4er\n",
if use3DRepresentation then idx3DTo1D(i, D.dim(1).size) else i,
x[i].peek(), y[i].peek(), z[i].peek());
}