mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1696 lines
		
	
	
		
			50 KiB
		
	
	
	
		
			Chapel
		
	
	
	
	
	
			
		
		
	
	
			1696 lines
		
	
	
		
			50 KiB
		
	
	
	
		
			Chapel
		
	
	
	
	
	
| /*
 | |
|   Derived from the DARPA/Livermore Unstructured Lagrangian Explicit
 | |
|   Shock Hydrodynamics (LULESH)
 | |
|   https://computation.llnl.gov/casc/ShockHydro/
 | |
| 
 | |
|   Original port to Chapel by Brandon Holt (8/2011).  Further
 | |
|   improvements for the sake of performance and/or generality made by
 | |
|   Sung-Eun Choi (12/2011, 11/2012), Jeff Keasler (3/2012), and Brad
 | |
|   Chamberlain (3-4,9-11/2012, 2/2013).
 | |
| 
 | |
| 
 | |
|   Notes on the Initial Implementation
 | |
|   -----------------------------------
 | |
|    
 | |
|   This implementation was designed to mirror the overall structure of
 | |
|   the C++ Lulesh but use Chapel constructs where they can help make
 | |
|   the code more readable, easier to maintain, or more
 | |
|   'elegant'. Function names are preserved for the most part, with some
 | |
|   additional helper functions, and original comments from the C++ code
 | |
|   are interspersed approximately where they belong to give an idea of
 | |
|   how the two codes line up. One major difference for this Chapel
 | |
|   version is the use of a number of module-level variables and
 | |
|   constants.
 | |
| 
 | |
| 
 | |
|   Status:
 | |
| 
 | |
|   This code remains a work-in-progress as we gain further experience
 | |
|   with it.  Proposed improvements are noted in the README in this
 | |
|   directory and (in some cases) in TODO comments in the code.
 | |
| 
 | |
|  */
 | |
| 
 | |
| 
 | |
| 
 | |
| use Time,       // to get timing routines for benchmarking
 | |
|     BlockDist;  // for block-distributed arrays
 | |
| 
 | |
| use luleshInit;   // initialization code for data set
 | |
| 
 | |
| /* The configuration parameters for lulesh.  These can be set on the
 | |
|    compiler command line using -s<paramName>=<value>.  For example,
 | |
| 
 | |
|      chpl -suseBlockDist=true
 | |
| 
 | |
|    useBlockDist : says whether or not to block-distribute the arrays.
 | |
|                   The default is based on the value of CHPL_COMM, as
 | |
|                   an indication of whether this is a single- or multi-
 | |
|                   locale execution.
 | |
| 
 | |
|    use3DRepresentation : indicates whether the element node arrays
 | |
|                          should be stored using a 3D representation
 | |
|                          (limiting the execution to cube inputs) or
 | |
|                          the more general 1D representation (supporting
 | |
|                          arbitrary data sets).
 | |
| 
 | |
|    useSparseMaterials : indicates whether sparse domains/arrays should be
 | |
|                         used to represent the materials.  Sparse domains
 | |
|                         are more realistic in that they permit an arbitrary
 | |
|                         subset of the problem space to store a material.
 | |
|                         Dense domains are sufficient for LULESH since there's
 | |
|                         an assumption that the material spans all cells.
 | |
| 
 | |
|    printWarnings : prints performance-oriented warnings to prevent
 | |
|                    surprises.
 | |
| */
 | |
|    
 | |
| config param useBlockDist = (CHPL_COMM != "none"),
 | |
|              use3DRepresentation = false,
 | |
|              useSparseMaterials = true,
 | |
|              printWarnings = true;
 | |
| 
 | |
| 
 | |
| //
 | |
| // Sanity check to ensure that input files aren't used with the 3D
 | |
| // representation
 | |
| //
 | |
| if (use3DRepresentation && (luleshInit.filename != "")) then
 | |
|   halt("The 3D representation does not support reading input from files");
 | |
| 
 | |
| 
 | |
| /* Configuration constants: Override defaults on executable's command-line */
 | |
| 
 | |
| config const initialEnergy = 3.948746e+7;            // initial energy value
 | |
| 
 | |
| 
 | |
| config const showProgress = false,   // print time and dt values on each step
 | |
|              debug = false,          // print various debug info
 | |
|              doTiming = true,        // time the main timestep loop
 | |
|              printCoords = true;     // print the final computed coordinates
 | |
| 
 | |
| 
 | |
| /* Compile-time constants */
 | |
| 
 | |
| param XI_M        = 0x003,
 | |
|       XI_M_SYMM   = 0x001,
 | |
|       XI_M_FREE   = 0x002,
 | |
| 
 | |
|       XI_P        = 0x00c,
 | |
|       XI_P_SYMM   = 0x004,
 | |
|       XI_P_FREE   = 0x008,
 | |
| 
 | |
|       ETA_M       = 0x030,
 | |
|       ETA_M_SYMM  = 0x010,
 | |
|       ETA_M_FREE  = 0x020,
 | |
| 
 | |
|       ETA_P       = 0x0c0,
 | |
|       ETA_P_SYMM  = 0x040,
 | |
|       ETA_P_FREE  = 0x080,
 | |
| 
 | |
|       ZETA_M      = 0x300,
 | |
|       ZETA_M_SYMM = 0x100,
 | |
|       ZETA_M_FREE = 0x200,
 | |
| 
 | |
|       ZETA_P      = 0xc00,
 | |
|       ZETA_P_SYMM = 0x400,
 | |
|       ZETA_P_FREE = 0x800;
 | |
| 
 | |
| 
 | |
| /* Set up the problem size */
 | |
| 
 | |
| const (numElems, numNodes) = initProblemSize();
 | |
| 
 | |
| 
 | |
| /* Declare abstract problem domains */
 | |
| 
 | |
| const ElemSpace = if use3DRepresentation
 | |
|                     then {0..#elemsPerEdge, 0..#elemsPerEdge, 0..#elemsPerEdge}
 | |
|                     else {0..#numElems},
 | |
|       NodeSpace = if use3DRepresentation
 | |
|                     then {0..#nodesPerEdge, 0..#nodesPerEdge, 0..#nodesPerEdge}
 | |
|                     else {0..#numNodes};
 | |
| 
 | |
| 
 | |
| /* Declare the (potentially distributed) problem domains */
 | |
| 
 | |
| const Elems = if useBlockDist then ElemSpace dmapped Block(ElemSpace)
 | |
|                               else ElemSpace,
 | |
|       Nodes = if useBlockDist then NodeSpace dmapped Block(NodeSpace)
 | |
|                               else NodeSpace;
 | |
| 
 | |
| 
 | |
| /* The coordinates */
 | |
| 
 | |
| var x, y, z: [Nodes] real;
 | |
|                               
 | |
| 
 | |
| /* The number of nodes per element.  In a rank-independent version,
 | |
|    this could be written 2**rank */
 | |
| 
 | |
| param nodesPerElem = 8;
 | |
| 
 | |
|                                  
 | |
| // We could name this, but chose not to since it doesn't add that much clarity
 | |
| //
 | |
| // const elemNeighbors = 1..nodesPerElem;
 | |
| 
 | |
| 
 | |
| /* The element-to-node mapping */
 | |
| 
 | |
| var elemToNode: [Elems] nodesPerElem*index(Nodes);
 | |
| 
 | |
| 
 | |
| /* the Greek variables */
 | |
| 
 | |
| var lxim, lxip, letam, letap, lzetam, lzetap: [Elems] index(Elems);
 | |
| 
 | |
| 
 | |
| /* the X, Y, Z Symmetry values */
 | |
| 
 | |
| var XSym, YSym, ZSym: sparse subdomain(Nodes);
 | |
| 
 | |
| 
 | |
| 
 | |
| /* Constants */
 | |
| 
 | |
| const u_cut = 1.0e-7,           /* velocity tolerance */
 | |
|       hgcoef = 3.0,             /* hourglass control */
 | |
|       qstop = 1.0e+12,          /* excessive q indicator */
 | |
|       monoq_max_slope = 1.0,
 | |
|       monoq_limiter_mult = 2.0,
 | |
|       e_cut = 1.0e-7,           /* energy tolerance */
 | |
|       p_cut = 1.0e-7,           /* pressure tolerance */
 | |
|       ss4o3 = 4.0/3.0,
 | |
|       q_cut = 1.0e-7,           /* q tolerance */
 | |
|       v_cut = 1.0e-10,          /* relative volume tolerance */
 | |
|       qlc_monoq = 0.5,          /* linear term coef for q */
 | |
|       qqc_monoq = 2.0/3.0,      /* quadratic term coef for q */
 | |
|       qqc = 2.0, 
 | |
|       qqc2 = 64.0 * qqc**2,
 | |
|       eosvmax = 1.0e+9,
 | |
|       eosvmin = 1.0e-9,
 | |
|       pmin = 0.0,               /* pressure floor */
 | |
|       emin = -1.0e+15,          /* energy floor */
 | |
|       dvovmax = 0.1,            /* maximum allowable volume change */
 | |
|       refdens = 1.0,            /* reference density */
 | |
| 
 | |
|       deltatimemultlb = 1.1,
 | |
|       deltatimemultub = 1.2,
 | |
|       dtmax = 1.0e-2;           /* maximum allowable time increment */
 | |
| 
 | |
|                               
 | |
| config const stoptime = 1.0e-2,      /* end time for simulation */
 | |
|              maxcycles = max(int),   /* max number of cycles to simulate */
 | |
|              dtfixed = -1.0e-7;      /* fixed time increment */
 | |
| 
 | |
| 
 | |
| /* The list of material elements */
 | |
| 
 | |
| const MatElems: MatElemsType = if useSparseMaterials then enumerateMatElems()
 | |
|                                                      else Elems;
 | |
| 
 | |
| 
 | |
| proc MatElemsType type {
 | |
|   if useSparseMaterials {
 | |
|     if (printWarnings && useBlockDist && numLocales > 1) then
 | |
|       writeln("WARNING: The LULESH Material Elements (MatElems) are not yet\n",
 | |
|               "         distributed, so result in excessive memory use on,\n",
 | |
|               "         and communication with, locale 0\n");
 | |
|     return sparse subdomain(Elems);
 | |
|   } else
 | |
|     return Elems.type;
 | |
| }
 | |
| 
 | |
| iter enumerateMatElems() {
 | |
|   if (printWarnings && useBlockDist && numLocales > 1) then
 | |
|     writeln("WARNING: generation of matrix elements is serial and\n",
 | |
|             "         unlikely to scale");
 | |
|   for i in Elems do
 | |
|     yield i;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Element fields */
 | |
| 
 | |
| var elemBC: [Elems] int,
 | |
| 
 | |
|     e: [Elems] real, // energy
 | |
|     p: [Elems] real, // pressure
 | |
| 
 | |
|     q: [Elems] real, // q
 | |
|     ql: [Elems] real, // linear term for q
 | |
|     qq: [Elems] real, // quadratic term for q
 | |
| 
 | |
|     v:    [Elems] real = 1.0, //relative volume
 | |
|     vnew: [Elems] real,
 | |
| 
 | |
|     volo: [Elems] real, // reference volume
 | |
|     delv: [Elems] real, // m_vnew - m_v
 | |
|     vdov: [Elems] real, // volume derivative over volume
 | |
| 
 | |
|     arealg: [Elems] real, // elem characteristic length
 | |
| 
 | |
|     ss: [Elems] real, // "sound speed"
 | |
| 
 | |
|     elemMass: [Elems] real; // mass
 | |
| 
 | |
| 
 | |
| /* Nodal fields */
 | |
| 
 | |
| var xd, yd, zd: [Nodes] real, // velocities
 | |
| 
 | |
|     xdd, ydd, zdd: [Nodes] real, // acceleration
 | |
| 
 | |
|     fx, fy, fz: [Nodes] atomic real, // forces
 | |
| 
 | |
|     nodalMass: [Nodes] real; // mass
 | |
| 
 | |
| 
 | |
| /* Parameters */
 | |
| 
 | |
| var time = 0.0,          // current time
 | |
|     deltatime = 1.0e-7,  // variable time increment
 | |
|     dtcourant = 1.0e20,  // courant constraint
 | |
|     dthydro = 1.0e20,    // volume change constraint
 | |
| 
 | |
|     cycle = 0;           // iteration count for simulation
 | |
| 
 | |
| 
 | |
| proc main() {
 | |
|   if debug then writeln("Lulesh -- Problem Size = ", numElems);
 | |
| 
 | |
|   initLulesh();
 | |
| 
 | |
|   var st: real;
 | |
|   if doTiming then st = getCurrentTime();
 | |
|   while (time < stoptime && cycle < maxcycles) {
 | |
|     const iterTime = if showProgress then getCurrentTime() else 0.0;
 | |
| 
 | |
|     TimeIncrement();
 | |
| 
 | |
|     LagrangeLeapFrog();
 | |
| 
 | |
|     if debug {
 | |
|       deprintatomic("[[ Forces ]]", fx, fy, fz);
 | |
|       deprint("[[ Positions ]]", x, y, z);
 | |
|       deprint("[[ p, e, q ]]", p, e, q);
 | |
|     }
 | |
|     if showProgress then
 | |
|       writef("time = %er, dt=%er, %s", time, deltatime,
 | |
|              if doTiming then ", elapsed = " + (getCurrentTime()-iterTime) +"\n"
 | |
|                          else "\n");
 | |
|   }
 | |
|   if (cycle == maxcycles) {
 | |
|     writeln("Stopped early due to reaching maxnumsteps");
 | |
|   }
 | |
|   if doTiming {
 | |
|     const et = getCurrentTime();
 | |
|     writeln("Total Time: ", et-st);
 | |
|     writeln("Time/Cycle: ", (et-st)/cycle);
 | |
|   }
 | |
|   writeln("Number of cycles: ", cycle);
 | |
| 
 | |
|   if printCoords {
 | |
|     var outfile = open("coords.out", iomode.cw);
 | |
|     var writer = outfile.writer();
 | |
|     var fmtstr = if debug then "%1.9re %1.9er %1.9er\n" 
 | |
|                           else "%1.4er %1.4er %1.4er\n";
 | |
|     for i in Nodes do
 | |
|       writer.writef(fmtstr, x[i], y[i], z[i]);
 | |
|     writer.close();
 | |
|     outfile.close();
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Initialization functions */
 | |
| 
 | |
| proc initLulesh() {
 | |
|   // initialize the coordinates
 | |
|   initCoordinates(x,y,z);
 | |
| 
 | |
|   // initialize the element to node mapping
 | |
|   initElemToNodeMapping(elemToNode);
 | |
| 
 | |
|   // initialize the greek symbols
 | |
|   initGreekVars(lxim, lxip, letam, letap, lzetam, lzetap);
 | |
| 
 | |
|   // initialize the symmetry plane locations
 | |
|   initXSyms(XSym);
 | |
|   initYSyms(YSym);
 | |
|   initZSyms(ZSym);
 | |
| 
 | |
|   /* embed hexehedral elements in nodal point lattice */
 | |
|   //calculated on the fly using: elemToNodes(i: index(Elems)): index(Nodes)
 | |
| 
 | |
|   // initialize the masses
 | |
|   initMasses();
 | |
| 
 | |
|   // initialize the boundary conditions
 | |
|   const octantCorner = initBoundaryConditions();
 | |
| 
 | |
|   // deposit the energy for Sedov Problem
 | |
|   e[octantCorner] = initialEnergy;
 | |
| }
 | |
| 
 | |
| 
 | |
| proc initMasses() {
 | |
|   // This is a temporary array used to accumulate masses in parallel
 | |
|   // without losing updates by using 'atomic' variables
 | |
|   var massAccum: [Nodes] atomic real;
 | |
| 
 | |
|   forall eli in Elems {
 | |
|     var x_local, y_local, z_local: 8*real;
 | |
|     localizeNeighborNodes(eli, x, x_local, y, y_local, z, z_local);
 | |
| 
 | |
|     const volume = CalcElemVolume(x_local, y_local, z_local);
 | |
|     volo[eli] = volume;
 | |
|     elemMass[eli] = volume;
 | |
| 
 | |
|     for neighbor in elemToNodes[eli] do
 | |
|       massAccum[neighbor].add(volume);
 | |
|   }
 | |
| 
 | |
|   // When we're done, copy the accumulated masses into nodalMass, at
 | |
|   // which point the massAccum array can go away (and will at the
 | |
|   // procedure's return
 | |
| 
 | |
|   forall i in Nodes do
 | |
|     nodalMass[i] = massAccum[i].read() / 8.0;
 | |
| 
 | |
|   if debug {
 | |
|     writeln("ElemMass:");
 | |
|     for mass in elemMass do writeln(mass);
 | |
| 
 | |
|     writeln("NodalMass:");
 | |
|     for mass in nodalMass do writeln(mass);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc initBoundaryConditions() {
 | |
|   var surfaceNode: [Nodes] int;
 | |
| 
 | |
|   forall n in XSym do
 | |
|     surfaceNode[n] = 1;
 | |
|   forall n in YSym do
 | |
|     surfaceNode[n] = 1;
 | |
|   forall n in ZSym do
 | |
|     surfaceNode[n] = 1;
 | |
| 
 | |
|   forall e in Elems do {
 | |
|     var mask: int;
 | |
|     for i in 1..nodesPerElem do
 | |
|       mask += surfaceNode[elemToNode[e][i]] << (i-1);
 | |
| 
 | |
|     // TODO: make an inlined function for this little idiom? (and below)
 | |
| 
 | |
|     if ((mask & 0x0f) == 0x0f) then elemBC[e] |= ZETA_M_SYMM;
 | |
|     if ((mask & 0xf0) == 0xf0) then elemBC[e] |= ZETA_P_SYMM;
 | |
|     if ((mask & 0x33) == 0x33) then elemBC[e] |= ETA_M_SYMM;
 | |
|     if ((mask & 0xcc) == 0xcc) then elemBC[e] |= ETA_P_SYMM;
 | |
|     if ((mask & 0x99) == 0x99) then elemBC[e] |= XI_M_SYMM;
 | |
|     if ((mask & 0x66) == 0x66) then elemBC[e] |= XI_P_SYMM;
 | |
|   }
 | |
| 
 | |
| 
 | |
|   //
 | |
|   // We find the octant corner by looking for the element with
 | |
|   // all three SYMM flags set, which will have the largest
 | |
|   // integral value.  Thus, we can use a maxloc to identify it.
 | |
|   //
 | |
|   const (check, loc) = maxloc reduce zip(elemBC, Elems);
 | |
| 
 | |
|   if debug then writeln("Found the octant corner at: ", loc);
 | |
| 
 | |
|   if (check != (XI_M_SYMM | ETA_M_SYMM | ZETA_M_SYMM)) then
 | |
|     halt("maxloc got a value of ", check, " at loc ", loc);
 | |
| 
 | |
|   // TODO: This is an example of an array that, in a distributed
 | |
|   // memory code, would typically be completely local and only storing
 | |
|   // the local nodes owned by the locale -- noting that some nodes
 | |
|   // are logically owned by multiple locales and therefore would 
 | |
|   // redundantly be stored in both locales' surfaceNode arrays -- it's
 | |
|   // essentially local scratchspace that does not need to be communicated
 | |
|   // or kept coherent across locales.
 | |
|   //
 | |
| 
 | |
|   surfaceNode = 0;
 | |
| 
 | |
|   /* the free surfaces */
 | |
| 
 | |
|   var freeSurface: sparse subdomain(Nodes);
 | |
| 
 | |
|   // initialize the free surface
 | |
|   initFreeSurface(freeSurface);
 | |
| 
 | |
|   forall n in freeSurface do
 | |
|     surfaceNode[n] = 1;
 | |
| 
 | |
|   forall e in Elems do {
 | |
|     var mask: int;
 | |
|     for i in 1..nodesPerElem do
 | |
|       mask += surfaceNode[elemToNode[e][i]] << (i-1);
 | |
| 
 | |
|     if ((mask & 0x0f) == 0x0f) then elemBC[e] |= ZETA_M_FREE;
 | |
|     if ((mask & 0xf0) == 0xf0) then elemBC[e] |= ZETA_P_FREE;
 | |
|     if ((mask & 0x33) == 0x33) then elemBC[e] |= ETA_M_FREE;
 | |
|     if ((mask & 0xcc) == 0xcc) then elemBC[e] |= ETA_P_FREE;
 | |
|     if ((mask & 0x99) == 0x99) then elemBC[e] |= XI_M_FREE;
 | |
|     if ((mask & 0x66) == 0x66) then elemBC[e] |= XI_P_FREE;
 | |
|   }
 | |
| 
 | |
|   if debug {
 | |
|     writeln("elemBC:");
 | |
|     for b in elemBC do writeln(b);
 | |
|   }
 | |
| 
 | |
|   return loc;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Helper functions */
 | |
| 
 | |
| inline proc localizeNeighborNodes(eli: index(Elems),
 | |
|                                   x: [] real, ref x_local: 8*real,
 | |
|                                   y: [] real, ref y_local: 8*real,
 | |
|                                   z: [] real, ref z_local: 8*real) {
 | |
| 
 | |
|   for i in 1..nodesPerElem {
 | |
|     const noi = elemToNode[eli][i];
 | |
| 
 | |
|     x_local[i] = x[noi];
 | |
|     y_local[i] = y[noi];
 | |
|     z_local[i] = z[noi];
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline proc TripleProduct(x1, y1, z1, x2, y2, z2, x3, y3, z3) {
 | |
|   return x1*(y2*z3 - z2*y3) + x2*(z1*y3 - y1*z3) + x3*(y1*z2 - z1*y2);
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcElemVolume(x, y, z) {
 | |
|   const dx61 = x[7] - x[2],
 | |
|         dy61 = y[7] - y[2],
 | |
|         dz61 = z[7] - z[2],
 | |
| 
 | |
|         dx70 = x[8] - x[1],
 | |
|         dy70 = y[8] - y[1],
 | |
|         dz70 = z[8] - z[1],
 | |
| 
 | |
|         dx63 = x[7] - x[4],
 | |
|         dy63 = y[7] - y[4],
 | |
|         dz63 = z[7] - z[4],
 | |
| 
 | |
|         dx20 = x[3] - x[1],
 | |
|         dy20 = y[3] - y[1],
 | |
|         dz20 = z[3] - z[1],
 | |
| 
 | |
|         dx50 = x[6] - x[1],
 | |
|         dy50 = y[6] - y[1],
 | |
|         dz50 = z[6] - z[1],
 | |
| 
 | |
|         dx64 = x[7] - x[5],
 | |
|         dy64 = y[7] - y[5],
 | |
|         dz64 = z[7] - z[5],
 | |
| 
 | |
|         dx31 = x[4] - x[2],
 | |
|         dy31 = y[4] - y[2],
 | |
|         dz31 = z[4] - z[2],
 | |
| 
 | |
|         dx72 = x[8] - x[3],
 | |
|         dy72 = y[8] - y[3],
 | |
|         dz72 = z[8] - z[3],
 | |
| 
 | |
|         dx43 = x[5] - x[4],
 | |
|         dy43 = y[5] - y[4],
 | |
|         dz43 = z[5] - z[4],
 | |
| 
 | |
|         dx57 = x[6] - x[8],
 | |
|         dy57 = y[6] - y[8],
 | |
|         dz57 = z[6] - z[8],
 | |
| 
 | |
|         dx14 = x[2] - x[5],
 | |
|         dy14 = y[2] - y[5],
 | |
|         dz14 = z[2] - z[5],
 | |
| 
 | |
|         dx25 = x[3] - x[6],
 | |
|         dy25 = y[3] - y[6],
 | |
|         dz25 = z[3] - z[6];
 | |
| 
 | |
|   const volume = TripleProduct(dx31 + dx72, dx63, dx20,
 | |
|                                dy31 + dy72, dy63, dy20,
 | |
|                                dz31 + dz72, dz63, dz20) +
 | |
|                  TripleProduct(dx43 + dx57, dx64, dx70,
 | |
|                                dy43 + dy57, dy64, dy70,
 | |
|                                dz43 + dz57, dz64, dz70) +
 | |
|                  TripleProduct(dx14 + dx25, dx61, dx50,
 | |
|                                dy14 + dy25, dy61, dy50,
 | |
|                                dz14 + dz25, dz61, dz50);
 | |
| 
 | |
|   return volume / 12.0;
 | |
| }
 | |
| 
 | |
| proc InitStressTermsForElems(p, q, sigxx, sigyy, sigzz: [?D] real) {
 | |
|   forall i in D {
 | |
|     sigxx[i] = -p[i] - q[i];
 | |
|     sigyy[i] = -p[i] - q[i];
 | |
|     sigzz[i] = -p[i] - q[i];
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcElemShapeFunctionDerivatives(x: 8*real, y: 8*real, z: 8*real, 
 | |
|                                       ref b_x: 8*real,
 | |
|                                       ref b_y: 8*real,
 | |
|                                       ref b_z: 8*real, 
 | |
|                                       ref volume: real) {
 | |
| 
 | |
|   const fjxxi = .125 * ((x[7]-x[1]) + (x[6]-x[4]) - (x[8]-x[2]) - (x[5]-x[3])),
 | |
|         fjxet = .125 * ((x[7]-x[1]) - (x[6]-x[4]) + (x[8]-x[2]) - (x[5]-x[3])),
 | |
|         fjxze = .125 * ((x[7]-x[1]) + (x[6]-x[4]) + (x[8]-x[2]) + (x[5]-x[3])),
 | |
| 
 | |
|         fjyxi = .125 * ((y[7]-y[1]) + (y[6]-y[4]) - (y[8]-y[2]) - (y[5]-y[3])),
 | |
|         fjyet = .125 * ((y[7]-y[1]) - (y[6]-y[4]) + (y[8]-y[2]) - (y[5]-y[3])),
 | |
|         fjyze = .125 * ((y[7]-y[1]) + (y[6]-y[4]) + (y[8]-y[2]) + (y[5]-y[3])),
 | |
| 
 | |
|         fjzxi = .125 * ((z[7]-z[1]) + (z[6]-z[4]) - (z[8]-z[2]) - (z[5]-z[3])),
 | |
|         fjzet = .125 * ((z[7]-z[1]) - (z[6]-z[4]) + (z[8]-z[2]) - (z[5]-z[3])),
 | |
|         fjzze = .125 * ((z[7]-z[1]) + (z[6]-z[4]) + (z[8]-z[2]) + (z[5]-z[3]));
 | |
| 
 | |
|   /* compute cofactors */
 | |
|   const cjxxi =    (fjyet * fjzze) - (fjzet * fjyze),
 | |
|         cjxet =  - (fjyxi * fjzze) + (fjzxi * fjyze),
 | |
|         cjxze =    (fjyxi * fjzet) - (fjzxi * fjyet),
 | |
| 
 | |
|         cjyxi =  - (fjxet * fjzze) + (fjzet * fjxze),
 | |
|         cjyet =    (fjxxi * fjzze) - (fjzxi * fjxze),
 | |
|         cjyze =  - (fjxxi * fjzet) + (fjzxi * fjxet),
 | |
| 
 | |
|         cjzxi =    (fjxet * fjyze) - (fjyet * fjxze),
 | |
|         cjzet =  - (fjxxi * fjyze) + (fjyxi * fjxze),
 | |
|         cjzze =    (fjxxi * fjyet) - (fjyxi * fjxet);
 | |
| 
 | |
|   /* calculate partials :
 | |
|      this need only be done for l = 0,1,2,3   since , by symmetry ,
 | |
|      (6,7,4,5) = - (0,1,2,3) .
 | |
|   */
 | |
|   b_x[1] =   -  cjxxi  -  cjxet  -  cjxze;
 | |
|   b_x[2] =      cjxxi  -  cjxet  -  cjxze;
 | |
|   b_x[3] =      cjxxi  +  cjxet  -  cjxze;
 | |
|   b_x[4] =   -  cjxxi  +  cjxet  -  cjxze;
 | |
|   b_x[5] = -b_x[3];
 | |
|   b_x[6] = -b_x[4];
 | |
|   b_x[7] = -b_x[1];
 | |
|   b_x[8] = -b_x[2];
 | |
| 
 | |
|   b_y[1] =   -  cjyxi  -  cjyet  -  cjyze;
 | |
|   b_y[2] =      cjyxi  -  cjyet  -  cjyze;
 | |
|   b_y[3] =      cjyxi  +  cjyet  -  cjyze;
 | |
|   b_y[4] =   -  cjyxi  +  cjyet  -  cjyze;
 | |
|   b_y[5] = -b_y[3];
 | |
|   b_y[6] = -b_y[4];
 | |
|   b_y[7] = -b_y[1];
 | |
|   b_y[8] = -b_y[2];
 | |
| 
 | |
|   b_z[1] =   -  cjzxi  -  cjzet  -  cjzze;
 | |
|   b_z[2] =      cjzxi  -  cjzet  -  cjzze;
 | |
|   b_z[3] =      cjzxi  +  cjzet  -  cjzze;
 | |
|   b_z[4] =   -  cjzxi  +  cjzet  -  cjzze;
 | |
|   b_z[5] = -b_z[3];
 | |
|   b_z[6] = -b_z[4];
 | |
|   b_z[7] = -b_z[1];
 | |
|   b_z[8] = -b_z[2];
 | |
| 
 | |
|   /* calculate jacobian determinant (volume) */
 | |
|   volume = 8.0 * ( fjxet * cjxet + fjyet * cjyet + fjzet * cjzet);
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcElemNodeNormals(ref pfx: 8*real, ref pfy: 8*real, ref pfz: 8*real, 
 | |
|                          x: 8*real, y: 8*real, z: 8*real) {
 | |
| 
 | |
|   proc ElemFaceNormal(param n1, param n2, param n3, param n4) {
 | |
|     const bisectX0 = 0.5 * (x[n4] + x[n3] - x[n2] - x[n1]),
 | |
|           bisectY0 = 0.5 * (y[n4] + y[n3] - y[n2] - y[n1]),
 | |
|           bisectZ0 = 0.5 * (z[n4] + z[n3] - z[n2] - z[n1]),
 | |
|           bisectX1 = 0.5 * (x[n3] + x[n2] - x[n4] - x[n1]),
 | |
|           bisectY1 = 0.5 * (y[n3] + y[n2] - y[n4] - y[n1]),
 | |
|           bisectZ1 = 0.5 * (z[n3] + z[n2] - z[n4] - z[n1]),
 | |
|           areaX    = 0.25 * (bisectY0 * bisectZ1 - bisectZ0 * bisectY1),
 | |
|           areaY    = 0.25 * (bisectZ0 * bisectX1 - bisectX0 * bisectZ1),
 | |
|           areaZ    = 0.25 * (bisectX0 * bisectY1 - bisectY0 * bisectX1);
 | |
| 
 | |
|     var rx, ry, rz: 8*real; //results
 | |
| 
 | |
|     (rx[n1], rx[n2], rx[n3], rx[n4]) = (areaX, areaX, areaX, areaX);
 | |
|     (ry[n1], ry[n2], ry[n3], ry[n4]) = (areaY, areaY, areaY, areaY);
 | |
|     (rz[n1], rz[n2], rz[n3], rz[n4]) = (areaZ, areaZ, areaZ, areaZ);
 | |
| 
 | |
|     return (rx, ry, rz);
 | |
|   }
 | |
| 
 | |
|   // calculate total normal from each face (faces are made up of
 | |
|   // combinations of nodes)
 | |
| 
 | |
|   (pfx, pfy, pfz) = ElemFaceNormal(1,2,3,4) + ElemFaceNormal(1,5,6,2) +
 | |
|                     ElemFaceNormal(2,6,7,3) + ElemFaceNormal(3,7,8,4) +
 | |
|                     ElemFaceNormal(4,8,5,1) + ElemFaceNormal(5,8,7,6);
 | |
| }
 | |
| 
 | |
| 
 | |
| proc SumElemStressesToNodeForces(b_x: 8*real, b_y: 8*real, b_z: 8*real, 
 | |
|                                  stress_xx:real,
 | |
|                                  stress_yy:real,
 | |
|                                  stress_zz: real, 
 | |
|                                  ref fx: 8*real,
 | |
|                                  ref fy: 8*real,
 | |
|                                  ref fz: 8*real) {
 | |
|   for i in 1..8 {
 | |
|     fx[i] = -(stress_xx * b_x[i]);
 | |
|     fy[i] = -(stress_yy * b_y[i]);
 | |
|     fz[i] = -(stress_zz * b_z[i]);
 | |
|   }
 | |
| }
 | |
| 
 | |
| proc CalcElemVolumeDerivative(x: 8*real, y: 8*real, z: 8*real) {
 | |
| 
 | |
|   proc VoluDer(param n0, param n1, param n2, param n3, param n4, param n5) {
 | |
|     const ox =   (y[n1] + y[n2]) * (z[n0] + z[n1]) 
 | |
|                - (y[n0] + y[n1]) * (z[n1] + z[n2])
 | |
|                + (y[n0] + y[n4]) * (z[n3] + z[n4]) 
 | |
|                - (y[n3] + y[n4]) * (z[n0] + z[n4])
 | |
|                - (y[n2] + y[n5]) * (z[n3] + z[n5])
 | |
|                + (y[n3] + y[n5]) * (z[n2] + z[n5]),
 | |
|           oy = - (x[n1] + x[n2]) * (z[n0] + z[n1])
 | |
|                + (x[n0] + x[n1]) * (z[n1] + z[n2])
 | |
|                - (x[n0] + x[n4]) * (z[n3] + z[n4])
 | |
|                + (x[n3] + x[n4]) * (z[n0] + z[n4])
 | |
|                + (x[n2] + x[n5]) * (z[n3] + z[n5])
 | |
|                - (x[n3] + x[n5]) * (z[n2] + z[n5]),
 | |
|           oz = - (y[n1] + y[n2]) * (x[n0] + x[n1])
 | |
|                + (y[n0] + y[n1]) * (x[n1] + x[n2])
 | |
|                - (y[n0] + y[n4]) * (x[n3] + x[n4])
 | |
|                + (y[n3] + y[n4]) * (x[n0] + x[n4])
 | |
|                + (y[n2] + y[n5]) * (x[n3] + x[n5])
 | |
|                - (y[n3] + y[n5]) * (x[n2] + x[n5]);
 | |
| 
 | |
|     return (ox/12.0, oy/12.0, oz/12.0);
 | |
|   }
 | |
| 
 | |
|   var dvdx, dvdy, dvdz: 8*real;
 | |
| 
 | |
|   (dvdx[1], dvdy[1], dvdz[1]) = VoluDer(2,3,4,5,6,8);
 | |
|   (dvdx[4], dvdy[4], dvdz[4]) = VoluDer(1,2,3,8,5,7);
 | |
|   (dvdx[3], dvdy[3], dvdz[3]) = VoluDer(4,1,2,7,8,6);
 | |
|   (dvdx[2], dvdy[2], dvdz[2]) = VoluDer(3,4,1,6,7,5);
 | |
|   (dvdx[5], dvdy[5], dvdz[5]) = VoluDer(8,7,6,1,4,2);
 | |
|   (dvdx[6], dvdy[6], dvdz[6]) = VoluDer(5,8,7,2,1,3);
 | |
|   (dvdx[7], dvdy[7], dvdz[7]) = VoluDer(6,5,8,3,2,4);
 | |
|   (dvdx[8], dvdy[8], dvdz[8]) = VoluDer(7,6,5,4,3,1);
 | |
| 
 | |
|   return (dvdx, dvdy, dvdz);
 | |
| }
 | |
| 
 | |
| inline proc CalcElemFBHourglassForce(xd: 8*real, yd: 8*real, zd: 8*real,
 | |
|                                      hourgam: 8*(4*real),
 | |
|                                      coefficient: real,
 | |
|                                      ref hgfx: 8*real,
 | |
|                                      ref hgfy: 8*real,
 | |
|                                      ref hgfz: 8*real) {
 | |
|   var hx, hy, hz: 4*real;
 | |
| 
 | |
|   // reduction
 | |
|   for i in 1..4 {
 | |
|     for j in 1..8 {
 | |
|       hx[i] += hourgam[j][i] * xd[j];
 | |
|       hy[i] += hourgam[j][i] * yd[j];
 | |
|       hz[i] += hourgam[j][i] * zd[j];
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   for i in 1..8 {
 | |
|     var shx, shy, shz: real;
 | |
|     for j in 1..4 {
 | |
|       shx += hourgam[i][j] * hx[j];
 | |
|       shy += hourgam[i][j] * hy[j];
 | |
|       shz += hourgam[i][j] * hz[j];
 | |
|     }
 | |
|     hgfx[i] = coefficient * shx;
 | |
|     hgfy[i] = coefficient * shy;
 | |
|     hgfz[i] = coefficient * shz;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcElemCharacteristicLength(x, y, z, volume) {
 | |
|   proc AreaFace(param p0, param p1, param p2, param p3) {
 | |
|     const fx = (x[p2] - x[p0]) - (x[p3] - x[p1]),
 | |
|           fy = (y[p2] - y[p0]) - (y[p3] - y[p1]),
 | |
|           fz = (z[p2] - z[p0]) - (z[p3] - z[p1]),
 | |
|           gx = (x[p2] - x[p0]) + (x[p3] - x[p1]),
 | |
|           gy = (y[p2] - y[p0]) + (y[p3] - y[p1]),
 | |
|           gz = (z[p2] - z[p0]) + (z[p3] - z[p1]),
 | |
|           area = (fx * fx + fy * fy + fz * fz) *
 | |
|                  (gx * gx + gy * gy + gz * gz) -
 | |
|                  (fx * gx + fy * gy + fz * gz) *
 | |
|                  (fx * gx + fy * gy + fz * gz);
 | |
| 
 | |
|     return area ;
 | |
|   }
 | |
| 
 | |
|   const charLength = max(AreaFace(1, 2, 3, 4),
 | |
|                          AreaFace(5, 6, 7, 8),
 | |
|                          AreaFace(1, 2, 6, 5),
 | |
|                          AreaFace(2, 3, 7, 6),
 | |
|                          AreaFace(3, 4, 8, 7),
 | |
|                          AreaFace(4, 1, 5, 8));
 | |
| 
 | |
|   return 4.0 * volume / sqrt(charLength);
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcElemVelocityGradient(xvel, yvel, zvel, pfx,  pfy, pfz,
 | |
|                               detJ, ref d: 6*real) {
 | |
|   const inv_detJ = 1.0 / detJ;
 | |
| 
 | |
|   d[1] = inv_detJ * ( pfx[1] * (xvel[1]-xvel[7])
 | |
|                     + pfx[2] * (xvel[2]-xvel[8])
 | |
|                     + pfx[3] * (xvel[3]-xvel[5])
 | |
|                     + pfx[4] * (xvel[4]-xvel[6]) );
 | |
|   d[2] = inv_detJ * ( pfy[1] * (yvel[1]-yvel[7])
 | |
|                     + pfy[2] * (yvel[2]-yvel[8])
 | |
|                     + pfy[3] * (yvel[3]-yvel[5])
 | |
|                     + pfy[4] * (yvel[4]-yvel[6]) );
 | |
|   d[3] = inv_detJ * ( pfz[1] * (zvel[1]-zvel[7])
 | |
|                     + pfz[2] * (zvel[2]-zvel[8])
 | |
|                     + pfz[3] * (zvel[3]-zvel[5])
 | |
|                     + pfz[4] * (zvel[4]-zvel[6]) );
 | |
| 
 | |
|   const dyddx  = inv_detJ * ( pfx[1] * (yvel[1]-yvel[7])
 | |
|                             + pfx[2] * (yvel[2]-yvel[8])
 | |
|                             + pfx[3] * (yvel[3]-yvel[5])
 | |
|                             + pfx[4] * (yvel[4]-yvel[6]) ),
 | |
| 
 | |
|         dxddy  = inv_detJ * ( pfy[1] * (xvel[1]-xvel[7])
 | |
|                             + pfy[2] * (xvel[2]-xvel[8])
 | |
|                             + pfy[3] * (xvel[3]-xvel[5])
 | |
|                             + pfy[4] * (xvel[4]-xvel[6]) ),
 | |
| 
 | |
|         dzddx  = inv_detJ * ( pfx[1] * (zvel[1]-zvel[7])
 | |
|                             + pfx[2] * (zvel[2]-zvel[8])
 | |
|                             + pfx[3] * (zvel[3]-zvel[5])
 | |
|                             + pfx[4] * (zvel[4]-zvel[6]) ),
 | |
| 
 | |
|         dxddz  = inv_detJ * ( pfz[1] * (xvel[1]-xvel[7])
 | |
|                             + pfz[2] * (xvel[2]-xvel[8])
 | |
|                             + pfz[3] * (xvel[3]-xvel[5])
 | |
|                             + pfz[4] * (xvel[4]-xvel[6]) ),
 | |
| 
 | |
|         dzddy  = inv_detJ * ( pfy[1] * (zvel[1]-zvel[7])
 | |
|                             + pfy[2] * (zvel[2]-zvel[8])
 | |
|                             + pfy[3] * (zvel[3]-zvel[5])
 | |
|                             + pfy[4] * (zvel[4]-zvel[6]) ),
 | |
| 
 | |
|         dyddz  = inv_detJ * ( pfz[1] * (yvel[1]-yvel[7])
 | |
|                             + pfz[2] * (yvel[2]-yvel[8])
 | |
|                             + pfz[3] * (yvel[3]-yvel[5])
 | |
|                             + pfz[4] * (yvel[4]-yvel[6]) );
 | |
| 
 | |
|   d[6]  = 0.5 * ( dxddy + dyddx );
 | |
|   d[5]  = 0.5 * ( dxddz + dzddx );
 | |
|   d[4]  = 0.5 * ( dzddy + dyddz );
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcPressureForElems(p_new: [?D] real, bvc, pbvc, 
 | |
|                           e_old, compression, vnewc,
 | |
|                           pmin: real, p_cut: real, eosvmax: real) {
 | |
| 
 | |
|   //
 | |
|   // TODO: Uncomment local once sparse domain is distributed
 | |
|   //
 | |
|   forall i in D /* do local */ {
 | |
|     const c1s = 2.0 / 3.0;
 | |
|     bvc[i] = c1s * (compression[i] + 1.0);
 | |
|     pbvc[i] = c1s;
 | |
|   }
 | |
| 
 | |
|   forall i in D {
 | |
|     p_new[i] = bvc[i] * e_old[i];
 | |
| 
 | |
|     if abs(p_new[i]) < p_cut then p_new[i] = 0.0;
 | |
|     if vnewc[i] >= eosvmax then p_new[i] = 0.0; //impossible?
 | |
|     if p_new[i] < pmin then p_new[i] = pmin;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc TimeIncrement() {
 | |
|   var targetdt = stoptime - time;
 | |
| 
 | |
|   if dtfixed <= 0.0 && cycle != 0 { //don't do this the first cycle
 | |
|     var olddt = deltatime,
 | |
|         newdt = 1.0e20;
 | |
| 
 | |
|     if dtcourant < newdt then newdt = dtcourant / 2.0;
 | |
|     if dthydro < newdt then   newdt = 2.0/3.0 * dthydro;
 | |
| 
 | |
|     const ratio = newdt / olddt;
 | |
|     if ratio >= 1.0 {
 | |
|       if ratio < deltatimemultlb then      newdt = olddt;
 | |
|       else if ratio > deltatimemultub then newdt = olddt * deltatimemultub;
 | |
|     }
 | |
|     if newdt > dtmax then newdt = dtmax;
 | |
| 
 | |
|     deltatime = newdt;
 | |
|   }
 | |
| 
 | |
|   /* TRY TO PREVENT VERY SMALL SCALING ON THE NEXT CYCLE */
 | |
|   if targetdt > deltatime && targetdt < (4.0/3.0 * deltatime) {
 | |
|     targetdt = 2.0/3.0 * deltatime;
 | |
|   }
 | |
|   if targetdt < deltatime then deltatime = targetdt;
 | |
| 
 | |
|   time += deltatime;
 | |
|   cycle += 1;
 | |
| }
 | |
| 
 | |
| inline proc LagrangeLeapFrog() {
 | |
|   /* calculate nodal forces, accelerations, velocities, positions, with
 | |
|    * applied boundary conditions and slide surface considerations */
 | |
|   LagrangeNodal();
 | |
| 
 | |
|   /* calculate element quantities (i.e. velocity gradient & q), and update
 | |
|    * material states */
 | |
|   LagrangeElements();
 | |
| 
 | |
|   CalcTimeConstraintsForElems();
 | |
| }
 | |
| 
 | |
| 
 | |
| inline proc LagrangeNodal() {
 | |
|   CalcForceForNodes();
 | |
| 
 | |
|   CalcAccelerationForNodes();
 | |
| 
 | |
|   ApplyAccelerationBoundaryConditionsForNodes();
 | |
| 
 | |
|   CalcVelocityForNodes(deltatime, u_cut);
 | |
| 
 | |
|   CalcPositionForNodes(deltatime);
 | |
| }
 | |
| 
 | |
| 
 | |
| inline proc LagrangeElements() {
 | |
|   CalcLagrangeElements();
 | |
| 
 | |
|   /* Calculate Q.  (Monotonic q option requires communication) */
 | |
|   CalcQForElems();
 | |
| 
 | |
|   ApplyMaterialPropertiesForElems();
 | |
| 
 | |
|   UpdateVolumesForElems();
 | |
| }
 | |
| 
 | |
| 
 | |
| inline proc CalcTimeConstraintsForElems() {
 | |
|   /* evaluate time constraint */
 | |
|   CalcCourantConstraintForElems();
 | |
| 
 | |
|   /* check hydro constraint */
 | |
|   CalcHydroConstraintForElems();
 | |
| }
 | |
| 
 | |
| 
 | |
| inline proc computeDTF(indx) {
 | |
|   const myvdov = vdov[indx];
 | |
| 
 | |
|   if myvdov == 0.0 then
 | |
|     return max(real);
 | |
| 
 | |
|   const myarealg = arealg[indx];
 | |
|   var dtf = ss[indx]**2;
 | |
|   if myvdov < 0.0 then
 | |
|     dtf += qqc2 * myarealg**2 * myvdov**2;
 | |
|   dtf = sqrt(dtf);
 | |
|   dtf = myarealg / dtf;
 | |
| 
 | |
|   return dtf;
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcCourantConstraintForElems() {
 | |
|   const val = min reduce [indx in MatElems] computeDTF(indx);
 | |
| 
 | |
|   if (val != max(real)) then
 | |
|     dtcourant = val;
 | |
| }
 | |
| 
 | |
| 
 | |
| inline proc calcDtHydroTmp(indx) {
 | |
|   const myvdov = vdov[indx];
 | |
|   if (myvdov == 0.0) then
 | |
|     return max(real);
 | |
|   else
 | |
|     return dvovmax / (abs(myvdov)+1.0e-20);
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcHydroConstraintForElems() {
 | |
|   const val = min reduce [indx in MatElems] calcDtHydroTmp(indx);
 | |
| 
 | |
|   if (val != max(real)) then
 | |
|     dthydro = val;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* calculate nodal forces, accelerations, velocities, positions, with
 | |
|  * applied boundary conditions and slide surface considerations */
 | |
| 
 | |
| proc CalcForceForNodes() {
 | |
|   //zero out all forces
 | |
|   forall x in fx do x.write(0);
 | |
|   forall y in fy do y.write(0);
 | |
|   forall z in fz do z.write(0);
 | |
| 
 | |
|   /* Calcforce calls partial, force, hourq */
 | |
|   CalcVolumeForceForElems();
 | |
| 
 | |
|   /* Calculate Nodal Forces at domain boundaries */
 | |
|   // this was commented out in C++ code, so left out here
 | |
| }
 | |
| 
 | |
| proc CalcVolumeForceForElems() {
 | |
|   var sigxx, sigyy, sigzz, determ: [Elems] real;
 | |
| 
 | |
|   /* Sum contributions to total stress tensor */
 | |
|   InitStressTermsForElems(p, q, sigxx, sigyy, sigzz);
 | |
| 
 | |
|   /* call elemlib stress integration loop to produce nodal forces from
 | |
|      material stresses. */
 | |
|   IntegrateStressForElems(sigxx, sigyy, sigzz, determ);
 | |
| 
 | |
|   /* check for negative element volume */
 | |
|   forall e in Elems {
 | |
|     if determ[e] <= 0.0 then
 | |
|       halt("can't have negative volume (determ[", e, "]=", determ[e], ")");
 | |
|   }
 | |
| 
 | |
|   CalcHourglassControlForElems(determ);
 | |
| }
 | |
| 
 | |
| 
 | |
| proc IntegrateStressForElems(sigxx, sigyy, sigzz, determ) {
 | |
|   forall k in Elems {
 | |
|     var b_x, b_y, b_z: 8*real;
 | |
|     var x_local, y_local, z_local: 8*real;
 | |
|     localizeNeighborNodes(k, x, x_local, y, y_local, z, z_local);
 | |
| 
 | |
|     var fx_local, fy_local, fz_local: 8*real;
 | |
| 
 | |
|     local {
 | |
|       /* Volume calculation involves extra work for numerical consistency. */
 | |
|       CalcElemShapeFunctionDerivatives(x_local, y_local, z_local, 
 | |
|                                        b_x, b_y, b_z, determ[k]);
 | |
|     
 | |
|       CalcElemNodeNormals(b_x, b_y, b_z, x_local, y_local, z_local);
 | |
| 
 | |
|       SumElemStressesToNodeForces(b_x, b_y, b_z, sigxx[k], sigyy[k], sigzz[k], 
 | |
|                                   fx_local, fy_local, fz_local);
 | |
|     }
 | |
| 
 | |
|     for (noi, t) in elemToNodesTuple(k) {
 | |
|       fx[noi].add(fx_local[t]);
 | |
|       fy[noi].add(fy_local[t]);
 | |
|       fz[noi].add(fz_local[t]);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcHourglassControlForElems(determ) {
 | |
|   var dvdx, dvdy, dvdz, x8n, y8n, z8n: [Elems] 8*real;
 | |
| 
 | |
|   forall eli in Elems {
 | |
|     /* Collect domain nodes to elem nodes */
 | |
|     var x1, y1, z1: 8*real;
 | |
|     localizeNeighborNodes(eli, x, x1, y, y1, z, z1);
 | |
|     var pfx, pfy, pfz: 8*real;
 | |
| 
 | |
|     local {
 | |
|       /* load into temporary storage for FB Hour Glass control */
 | |
|       (dvdx[eli], dvdy[eli], dvdz[eli]) = CalcElemVolumeDerivative(x1, y1, z1);
 | |
|     }
 | |
| 
 | |
|     x8n[eli]  = x1;
 | |
|     y8n[eli]  = y1;
 | |
|     z8n[eli]  = z1;
 | |
| 
 | |
|     determ[eli] = volo[eli] * v[eli];
 | |
|   }
 | |
| 
 | |
|   /* Do a check for negative volumes */
 | |
|   forall e in Elems {
 | |
|     if v[e] <= 0.0 then
 | |
|       halt("can't have negative (or zero) volume. (in Hourglass, v[", e, "]=", v[e], ")");
 | |
|   }
 | |
| 
 | |
|   if hgcoef > 0.0 {
 | |
|     CalcFBHourglassForceForElems(determ, x8n, y8n, z8n, dvdx, dvdy, dvdz);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| const gammaCoef: 4*(8*real) = // WAS: [1..4, 1..8] real = 
 | |
|                 (( 1.0,  1.0, -1.0, -1.0, -1.0, -1.0,  1.0,  1.0),
 | |
|                  ( 1.0, -1.0, -1.0,  1.0, -1.0,  1.0,  1.0, -1.0),
 | |
|                  ( 1.0, -1.0,  1.0, -1.0,  1.0, -1.0,  1.0, -1.0),
 | |
|                  (-1.0,  1.0, -1.0,  1.0,  1.0, -1.0,  1.0, -1.0));
 | |
| 
 | |
| /* Calculates the Flanagan-Belytschko anti-hourglass force. */
 | |
| proc CalcFBHourglassForceForElems(determ, x8n, y8n, z8n, dvdx, dvdy, dvdz) {
 | |
| 
 | |
|   /* compute the hourglass modes */
 | |
|   forall eli in Elems {
 | |
|     var hourgam: 8*(4*real);
 | |
|     const volinv = 1.0 / determ[eli];
 | |
|     var ss1, mass1, volume13: real;
 | |
|     var hgfx, hgfy, hgfz: 8*real;
 | |
|     var coefficient: real;
 | |
| 
 | |
|     var xd1, yd1, zd1: 8*real;
 | |
|     localizeNeighborNodes(eli, xd, xd1, yd, yd1, zd, zd1);
 | |
| 
 | |
|     /* TODO: Can we enable this local block? */
 | |
|     // local {
 | |
|       for i in 1..4 {
 | |
|         var hourmodx, hourmody, hourmodz: real;
 | |
|         // reduction
 | |
|         for j in 1..8 {
 | |
|           hourmodx += x8n[eli][j] * gammaCoef[i][j];
 | |
|           hourmody += y8n[eli][j] * gammaCoef[i][j];
 | |
|           hourmodz += z8n[eli][j] * gammaCoef[i][j];
 | |
|         }
 | |
| 
 | |
|         for j in 1..8 {
 | |
|           hourgam[j][i] = gammaCoef[i][j] - volinv * 
 | |
|             (dvdx[eli][j] * hourmodx +
 | |
|              dvdy[eli][j] * hourmody +
 | |
|              dvdz[eli][j] * hourmodz);
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       /* compute forces */
 | |
|       /* store forces into h arrays (force arrays) */
 | |
|       ss1 = ss[eli];
 | |
|       mass1 = elemMass[eli];
 | |
|       volume13 = cbrt(determ[eli]);
 | |
| 
 | |
|       coefficient = - hgcoef * 0.01 * ss1 * mass1 / volume13;
 | |
| 
 | |
|       CalcElemFBHourglassForce(xd1, yd1, zd1, hourgam, coefficient,
 | |
|                                hgfx, hgfy, hgfz);
 | |
|       // } // end local
 | |
| 
 | |
|     for (noi,i) in elemToNodesTuple(eli) {
 | |
|       fx[noi].add(hgfx[i]);
 | |
|       fy[noi].add(hgfy[i]);
 | |
|       fz[noi].add(hgfz[i]);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcAccelerationForNodes() {
 | |
|   forall noi in Nodes do local {
 | |
|       xdd[noi] = fx[noi].read() / nodalMass[noi];
 | |
|       ydd[noi] = fy[noi].read() / nodalMass[noi];
 | |
|       zdd[noi] = fz[noi].read() / nodalMass[noi];
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc ApplyAccelerationBoundaryConditionsForNodes() {
 | |
|   // TODO: Shouldn't we be able to write these as follows?
 | |
|   //
 | |
|   // xdd[XSym] = 0.0;
 | |
|   // ydd[YSym] = 0.0;
 | |
|   // zdd[ZSym] = 0.0;
 | |
|   
 | |
|   forall x in XSym do xdd[x] = 0.0;
 | |
|   forall y in YSym do ydd[y] = 0.0;
 | |
|   forall z in ZSym do zdd[z] = 0.0;
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcVelocityForNodes(dt: real, u_cut: real) {
 | |
|   forall i in Nodes do local {
 | |
|     var xdtmp = xd[i] + xdd[i] * dt,
 | |
|         ydtmp = yd[i] + ydd[i] * dt,
 | |
|         zdtmp = zd[i] + zdd[i] * dt;
 | |
|     if abs(xdtmp) < u_cut then xdtmp = 0.0;
 | |
|     if abs(ydtmp) < u_cut then ydtmp = 0.0;
 | |
|     if abs(zdtmp) < u_cut then zdtmp = 0.0;
 | |
|     xd[i] = xdtmp;
 | |
|     yd[i] = ydtmp;
 | |
|     zd[i] = zdtmp;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcPositionForNodes(dt: real) {
 | |
|   forall ijk in Nodes {
 | |
|     x[ijk] += xd[ijk] * dt;
 | |
|     y[ijk] += yd[ijk] * dt;
 | |
|     z[ijk] += zd[ijk] * dt;
 | |
|   }
 | |
| }
 | |
| 
 | |
| // sungeun: Temporary array reused throughout
 | |
| proc CalcLagrangeElements() {
 | |
|   var dxx, dyy, dzz: [Elems] real;
 | |
| 
 | |
|   CalcKinematicsForElems(dxx, dyy, dzz, deltatime);
 | |
| 
 | |
|   // element loop to do some stuff not included in the elemlib function.
 | |
|   forall k in Elems do local {
 | |
|     vdov[k] = dxx[k] + dyy[k] + dzz[k];
 | |
|     var vdovthird = vdov[k] / 3.0;
 | |
|     dxx[k] -= vdovthird;
 | |
|     dyy[k] -= vdovthird;
 | |
|     dzz[k] -= vdovthird;
 | |
|   }
 | |
| 
 | |
|   // See if any volumes are negative, and take appropriate action.
 | |
|   forall e in Elems {
 | |
|     if vnew[e] <= 0.0 then
 | |
|       halt("can't have negative volume (vnew[", e, "]=", vnew[e], ")");
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcKinematicsForElems(dxx, dyy, dzz, const dt: real) {
 | |
|   // loop over all elements
 | |
|   forall k in Elems {
 | |
|     var b_x, b_y, b_z: 8*real,
 | |
|         d: 6*real,
 | |
|         detJ: real;
 | |
| 
 | |
|     //get nodal coordinates from global arrays and copy into local arrays
 | |
|     var x_local, y_local, z_local: 8*real;
 | |
|     localizeNeighborNodes(k, x, x_local, y, y_local, z, z_local);
 | |
| 
 | |
|     //get nodal velocities from global arrays and copy into local arrays
 | |
|     var xd_local, yd_local, zd_local: 8*real;
 | |
|     localizeNeighborNodes(k, xd, xd_local, yd, yd_local, zd, zd_local);
 | |
|     var dt2 = 0.5 * dt; //wish this was local, too...
 | |
| 
 | |
|     local {
 | |
|       //volume calculations
 | |
|       const volume = CalcElemVolume(x_local, y_local, z_local);
 | |
|       const relativeVolume = volume / volo[k];
 | |
|       vnew[k] = relativeVolume;
 | |
|       delv[k] = relativeVolume - v[k];
 | |
| 
 | |
|       //set characteristic length
 | |
|       arealg[k] = CalcElemCharacteristicLength(x_local, y_local, z_local,
 | |
|                                                volume);
 | |
| 
 | |
|       for i in 1..8 {
 | |
|         x_local[i] -= dt2 * xd_local[i];
 | |
|         y_local[i] -= dt2 * yd_local[i];
 | |
|         z_local[i] -= dt2 * zd_local[i];
 | |
|       }
 | |
| 
 | |
|       CalcElemShapeFunctionDerivatives(x_local, y_local, z_local,
 | |
|                                        b_x, b_y, b_z, detJ);
 | |
| 
 | |
|       CalcElemVelocityGradient(xd_local, yd_local, zd_local, b_x, b_y, b_z,
 | |
|                                detJ, d);
 | |
| 
 | |
|     }
 | |
| 
 | |
|     // put velocity gradient quantities into their global arrays.
 | |
|     dxx[k] = d[1];
 | |
|     dyy[k] = d[2];
 | |
|     dzz[k] = d[3];
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| // sungeun: Temporary array reused throughout
 | |
| /* velocity gradient */
 | |
| var delv_xi, delv_eta, delv_zeta: [Elems] real;
 | |
| /* position gradient */
 | |
| var delx_xi, delx_eta, delx_zeta: [Elems] real;
 | |
| 
 | |
| proc CalcQForElems() {
 | |
|   // MONOTONIC Q option
 | |
| 
 | |
|   /* Calculate velocity gradients */
 | |
|   CalcMonotonicQGradientsForElems(delv_xi, delv_eta, delv_zeta, 
 | |
|                                   delx_xi, delx_eta, delx_zeta);
 | |
| 
 | |
|   /* Transfer veloctiy gradients in the first order elements */
 | |
|   /* problem->commElements->Transfer(CommElements::monoQ) ; */
 | |
|   CalcMonotonicQForElems(delv_xi, delv_eta, delv_zeta,
 | |
|                          delx_xi, delx_eta, delx_zeta);
 | |
| 
 | |
|   /* Don't allow excessive artificial viscosity */
 | |
|   forall e in Elems {
 | |
|     if q[e] > qstop then
 | |
|       halt("Excessive artificial viscosity!  (q[", e, "]=", q[e], ")");
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| // sungeun: Temporary array reused throughout
 | |
| var vnewc: [MatElems] real;
 | |
| 
 | |
| /* Expose all of the variables needed for material evaluation */
 | |
| proc ApplyMaterialPropertiesForElems() {
 | |
| 
 | |
|   forall i in MatElems do vnewc[i] = vnew[i];
 | |
| 
 | |
|   if eosvmin != 0.0 then
 | |
|     [c in vnewc] if c < eosvmin then c = eosvmin;
 | |
| 
 | |
|   if eosvmax != 0.0 then
 | |
|     [c in vnewc] if c > eosvmax then c = eosvmax;
 | |
| 
 | |
| 
 | |
|   // old comment: The following loop should compute min/max reductions;
 | |
|   // currently, race-y
 | |
| 
 | |
|   forall matelm in MatElems {
 | |
|     var vc = v[matelm];
 | |
|     if eosvmin != 0.0 && vc < eosvmin then vc = eosvmin;
 | |
|     if eosvmax != 0.0 && vc > eosvmax then vc = eosvmax;
 | |
|     if vc <= 0.0 {
 | |
|       writeln("Volume error (in ApplyMaterialProperiesForElems).");
 | |
|       exit(1);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   EvalEOSForElems(vnewc);
 | |
| }
 | |
| 
 | |
| 
 | |
| proc UpdateVolumesForElems() {
 | |
|   forall i in Elems do local {
 | |
|     var tmpV = vnew[i];
 | |
|     if abs(tmpV-1.0) < v_cut then tmpV = 1.0;
 | |
|     v[i] = tmpV;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcMonotonicQGradientsForElems(delv_xi, delv_eta, delv_zeta, 
 | |
|                                      delx_xi, delx_eta, delx_zeta) {
 | |
|   forall eli in Elems {
 | |
|     const ptiny = 1.0e-36;
 | |
|     var xl, yl, zl: 8*real;
 | |
|     localizeNeighborNodes(eli, x, xl, y, yl, z, zl);
 | |
|     var xvl, yvl, zvl: 8*real;
 | |
|     localizeNeighborNodes(eli, xd, xvl, yd, yvl, zd, zvl);
 | |
| 
 | |
|     local {
 | |
|       const vol = volo[eli] * vnew[eli],
 | |
|             norm = 1.0 / (vol + ptiny);
 | |
|       var ax, ay, az, dxv, dyv, dzv: real;
 | |
| 
 | |
|       const dxj = -0.25*((xl[1]+xl[2]+xl[6]+xl[5])-(xl[4]+xl[3]+xl[7]+xl[8])),
 | |
|             dyj = -0.25*((yl[1]+yl[2]+yl[6]+yl[5])-(yl[4]+yl[3]+yl[7]+yl[8])),
 | |
|             dzj = -0.25*((zl[1]+zl[2]+zl[6]+zl[5])-(zl[4]+zl[3]+zl[7]+zl[8])),
 | |
|       
 | |
|             dxi =  0.25*((xl[2]+xl[3]+xl[7]+xl[6])-(xl[1]+xl[4]+xl[8]+xl[5])),
 | |
|             dyi =  0.25*((yl[2]+yl[3]+yl[7]+yl[6])-(yl[1]+yl[4]+yl[8]+yl[5])),
 | |
|             dzi =  0.25*((zl[2]+zl[3]+zl[7]+zl[6])-(zl[1]+zl[4]+zl[8]+zl[5])),
 | |
|         
 | |
|             dxk =  0.25*((xl[5]+xl[6]+xl[7]+xl[8])-(xl[1]+xl[2]+xl[3]+xl[4])),
 | |
|             dyk =  0.25*((yl[5]+yl[6]+yl[7]+yl[8])-(yl[1]+yl[2]+yl[3]+yl[4])),
 | |
|             dzk =  0.25*((zl[5]+zl[6]+zl[7]+zl[8])-(zl[1]+zl[2]+zl[3]+zl[4]));
 | |
| 
 | |
|       /* find delvk and delxk ( i cross j ) */
 | |
| 
 | |
|       ax = dyi*dzj - dzi*dyj;
 | |
|       ay = dzi*dxj - dxi*dzj;
 | |
|       az = dxi*dyj - dyi*dxj;
 | |
| 
 | |
|       delx_zeta[eli] = vol / sqrt(ax*ax + ay*ay + az*az + ptiny);
 | |
| 
 | |
|       ax *= norm;
 | |
|       ay *= norm;
 | |
|       az *= norm;
 | |
| 
 | |
|       dxv = 0.25*((xvl[5]+xvl[6]+xvl[7]+xvl[8])-(xvl[1]+xvl[2]+xvl[3]+xvl[4]));
 | |
|       dyv = 0.25*((yvl[5]+yvl[6]+yvl[7]+yvl[8])-(yvl[1]+yvl[2]+yvl[3]+yvl[4]));
 | |
|       dzv = 0.25*((zvl[5]+zvl[6]+zvl[7]+zvl[8])-(zvl[1]+zvl[2]+zvl[3]+zvl[4]));
 | |
| 
 | |
|       delv_zeta[eli] = ax*dxv + ay*dyv + az*dzv;
 | |
| 
 | |
|       /* find delxi and delvi ( j cross k ) */
 | |
| 
 | |
|       ax = dyj*dzk - dzj*dyk;
 | |
|       ay = dzj*dxk - dxj*dzk;
 | |
|       az = dxj*dyk - dyj*dxk;
 | |
| 
 | |
|       delx_xi[eli] = vol / sqrt(ax*ax + ay*ay + az*az + ptiny) ;
 | |
| 
 | |
|       ax *= norm;
 | |
|       ay *= norm;
 | |
|       az *= norm;
 | |
| 
 | |
|       dxv = 0.25*((xvl[2]+xvl[3]+xvl[7]+xvl[6])-(xvl[1]+xvl[4]+xvl[8]+xvl[5]));
 | |
|       dyv = 0.25*((yvl[2]+yvl[3]+yvl[7]+yvl[6])-(yvl[1]+yvl[4]+yvl[8]+yvl[5]));
 | |
|       dzv = 0.25*((zvl[2]+zvl[3]+zvl[7]+zvl[6])-(zvl[1]+zvl[4]+zvl[8]+zvl[5]));
 | |
| 
 | |
|       delv_xi[eli] = ax*dxv + ay*dyv + az*dzv ;
 | |
| 
 | |
|       /* find delxj and delvj ( k cross i ) */
 | |
| 
 | |
|       ax = dyk*dzi - dzk*dyi;
 | |
|       ay = dzk*dxi - dxk*dzi;
 | |
|       az = dxk*dyi - dyk*dxi;
 | |
| 
 | |
|       delx_eta[eli] = vol / sqrt(ax*ax + ay*ay + az*az + ptiny) ;
 | |
| 
 | |
|       ax *= norm;
 | |
|       ay *= norm;
 | |
|       az *= norm;
 | |
| 
 | |
|       dxv= -0.25*((xvl[1]+xvl[2]+xvl[6]+xvl[5])-(xvl[4]+xvl[3]+xvl[7]+xvl[8]));
 | |
|       dyv= -0.25*((yvl[1]+yvl[2]+yvl[6]+yvl[5])-(yvl[4]+yvl[3]+yvl[7]+yvl[8]));
 | |
|       dzv= -0.25*((zvl[1]+zvl[2]+zvl[6]+zvl[5])-(zvl[4]+zvl[3]+zvl[7]+zvl[8]));
 | |
| 
 | |
|       delv_eta[eli] = ax*dxv + ay*dyv + az*dzv ;
 | |
|     } /* local */
 | |
|   } /* forall eli */
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcMonotonicQForElems(delv_xi, delv_eta, delv_zeta, 
 | |
|                             delx_xi, delx_eta, delx_zeta) {
 | |
|   //got rid of call through to "CalcMonotonicQRegionForElems"
 | |
| 
 | |
|   forall i in MatElems {
 | |
|     const ptiny = 1.0e-36;
 | |
|     const bcMask = elemBC[i];
 | |
|     var norm, delvm, delvp: real;
 | |
| 
 | |
|     /* phixi */
 | |
|     norm = 1.0 / (delv_xi[i] + ptiny);
 | |
| 
 | |
|     select bcMask & XI_M {
 | |
|       when 0         do delvm = delv_xi[lxim(i)];
 | |
|       when XI_M_SYMM do delvm = delv_xi[i];
 | |
|       when XI_M_FREE do delvm = 0.0;
 | |
|     }
 | |
|     select bcMask & XI_P {
 | |
|       when 0         do delvp = delv_xi[lxip(i)];
 | |
|       when XI_P_SYMM do delvp = delv_xi[i];
 | |
|       when XI_P_FREE do delvp = 0.0;
 | |
|     }
 | |
| 
 | |
|     delvm *= norm;
 | |
|     delvp *= norm;
 | |
| 
 | |
|     var phixi = 0.5 * (delvm + delvp);
 | |
| 
 | |
|     delvm *= monoq_limiter_mult;
 | |
|     delvp *= monoq_limiter_mult;
 | |
| 
 | |
|     if delvm < phixi           then phixi = delvm;
 | |
|     if delvp < phixi           then phixi = delvp;
 | |
|     if phixi < 0               then phixi = 0.0;
 | |
|     if phixi > monoq_max_slope then phixi = monoq_max_slope;
 | |
| 
 | |
|     /* phieta */
 | |
|     norm = 1.0 / (delv_eta[i] + ptiny);
 | |
| 
 | |
|     select bcMask & ETA_M {
 | |
|       when 0          do delvm = delv_eta[letam[i]];
 | |
|       when ETA_M_SYMM do delvm = delv_eta[i];      
 | |
|       when ETA_M_FREE do delvm = 0.0;      
 | |
|     }
 | |
|     select bcMask & ETA_P {
 | |
|       when 0          do delvp = delv_eta[letap[i]];
 | |
|       when ETA_P_SYMM do delvp = delv_eta[i];      
 | |
|       when ETA_P_FREE do delvp = 0.0;      
 | |
|     }
 | |
| 
 | |
|     delvm = delvm * norm;
 | |
|     delvp = delvp * norm;
 | |
| 
 | |
|     var phieta = 0.5 * (delvm + delvp);
 | |
| 
 | |
|     delvm *= monoq_limiter_mult;
 | |
|     delvp *= monoq_limiter_mult;
 | |
| 
 | |
|     if delvm  < phieta          then phieta = delvm;
 | |
|     if delvp  < phieta          then phieta = delvp;
 | |
|     if phieta < 0.0             then phieta = 0.0;
 | |
|     if phieta > monoq_max_slope then phieta = monoq_max_slope;
 | |
| 
 | |
|     /*  phizeta     */
 | |
|     norm = 1.0 / (delv_zeta[i] + ptiny);
 | |
| 
 | |
|     select bcMask & ZETA_M {
 | |
|       when 0           do delvm = delv_zeta[lzetam[i]];
 | |
|       when ZETA_M_SYMM do delvm = delv_zeta[i];       
 | |
|       when ZETA_M_FREE do delvm = 0.0;        
 | |
|     }
 | |
|     select bcMask & ZETA_P {
 | |
|       when 0           do delvp = delv_zeta[lzetap[i]];
 | |
|       when ZETA_P_SYMM do delvp = delv_zeta[i];       
 | |
|       when ZETA_P_FREE do delvp = 0.0;        
 | |
|     }
 | |
| 
 | |
|     delvm = delvm * norm;
 | |
|     delvp = delvp * norm;
 | |
| 
 | |
|     var phizeta = 0.5 * (delvm + delvp);
 | |
| 
 | |
|     delvm *= monoq_limiter_mult;
 | |
|     delvp *= monoq_limiter_mult;
 | |
| 
 | |
|     if delvm   < phizeta          then phizeta = delvm;
 | |
|     if delvp   < phizeta         then phizeta = delvp;
 | |
|     if phizeta < 0.0                 then phizeta = 0.0;
 | |
|     if phizeta > monoq_max_slope then phizeta = monoq_max_slope;
 | |
| 
 | |
|     /* Remove length scale */
 | |
|     var qlin, qquad: real;
 | |
|     if vdov[i] > 0.0 {
 | |
|       qlin  = 0.0;
 | |
|       qquad = 0.0;
 | |
|     } else {
 | |
|       var delvxxi   = delv_xi[i]   * delx_xi[i],
 | |
|           delvxeta  = delv_eta[i]  * delx_eta[i],
 | |
|           delvxzeta = delv_zeta[i] * delx_zeta[i];
 | |
| 
 | |
|       if delvxxi   > 0.0 then delvxxi   = 0.0;
 | |
|       if delvxeta  > 0.0 then delvxeta  = 0.0;
 | |
|       if delvxzeta > 0.0 then delvxzeta = 0.0;
 | |
| 
 | |
|       const rho = elemMass[i] / (volo[i] * vnew[i]);
 | |
| 
 | |
|       qlin = -qlc_monoq * rho *
 | |
|         ( delvxxi   * (1.0 - phixi) +
 | |
|           delvxeta  * (1.0 - phieta) +
 | |
|           delvxzeta * (1.0 - phizeta));
 | |
| 
 | |
|       qquad = qqc_monoq * rho *
 | |
|         ( delvxxi**2   * (1.0 - phixi**2) +
 | |
|           delvxeta**2  * (1.0 - phieta**2) +
 | |
|           delvxzeta**2 * (1.0 - phizeta**2));
 | |
|     }
 | |
|     qq[i] = qquad;
 | |
|     ql[i] = qlin;
 | |
| 
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc EvalEOSForElems(vnewc) {
 | |
|   const rho0 = refdens;
 | |
| 
 | |
|   var e_old, delvc, p_old, q_old, compression, compHalfStep, 
 | |
|     qq_old, ql_old, work, p_new, e_new, q_new, bvc, pbvc: [Elems] real;
 | |
| 
 | |
|   /* compress data, minimal set */
 | |
|   forall i in MatElems {
 | |
|     e_old[i]  = e[i];
 | |
|     delvc[i]  = delv[i];
 | |
|     p_old[i]  = p[i];
 | |
|     q_old[i]  = q[i];
 | |
|     qq_old[i] = qq[i];
 | |
|     ql_old[i] = ql[i];
 | |
|   }
 | |
| 
 | |
|   //
 | |
|   // TODO: Uncomment local once sparse domain is distributed
 | |
|   //
 | |
|   forall i in Elems /* do local */ {
 | |
|     compression[i] = 1.0 / vnewc[i] - 1.0;
 | |
|     const vchalf = vnewc[i] - delvc[i] * 0.5;
 | |
|     compHalfStep[i] = 1.0 / vchalf - 1.0;
 | |
|   }
 | |
| 
 | |
|   /* Check for v > eosvmax or v < eosvmin */
 | |
|   // (note: I think this was already checked for in calling function!)
 | |
|   if eosvmin != 0.0 {
 | |
|     forall i in Elems {
 | |
|       if vnewc[i] <= eosvmin then compHalfStep[i] = compression[i];
 | |
|     }
 | |
|   }
 | |
|   if eosvmax != 0.0 {
 | |
|     forall i in Elems {
 | |
|       if vnewc[i] >= eosvmax {
 | |
|         p_old[i] = 0.0;
 | |
|         compression[i] = 0.0;
 | |
|         compHalfStep[i] = 0.0;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   CalcEnergyForElems(p_new, e_new, q_new, bvc, pbvc, 
 | |
|                      p_old, e_old, q_old, compression, compHalfStep, 
 | |
|                      vnewc, work, delvc, qq_old, ql_old);
 | |
| 
 | |
|   forall i in MatElems {
 | |
|     p[i] = p_new[i];
 | |
|     e[i] = e_new[i];
 | |
|     q[i] = q_new[i];
 | |
|   }
 | |
| 
 | |
|   CalcSoundSpeedForElems(vnewc, rho0, e_new, p_new, pbvc, bvc);
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcEnergyForElems(p_new, e_new, q_new, bvc, pbvc,
 | |
|                         p_old, e_old, q_old, compression, compHalfStep, 
 | |
|                         vnewc, work, delvc, qq_old, ql_old) {
 | |
|   // TODO: might need to move these consts into foralls or global
 | |
|   // Otherwise, they live on Locale0 and everyone else has to do 
 | |
|   // remote reads.  OR: Check if they're remote value forwarded.
 | |
|   const rho0 = refdens; 
 | |
|   const sixth = 1.0 / 6.0;
 | |
| 
 | |
|   var pHalfStep: [MatElems] real;
 | |
| 
 | |
|   forall i in Elems {
 | |
|     e_new[i] = e_old[i] - 0.5 * delvc[i] * (p_old[i] + q_old[i]) 
 | |
|                         + 0.5 * work[i];
 | |
|     if e_new[i] < emin then e_new[i] = emin;
 | |
|   }
 | |
| 
 | |
|   CalcPressureForElems(pHalfStep, bvc, pbvc, e_new, compHalfStep,
 | |
|                        vnewc, pmin, p_cut, eosvmax);
 | |
| 
 | |
|   forall i in Elems {
 | |
|     const vhalf = 1.0 / (1.0 + compHalfStep[i]);
 | |
| 
 | |
|     if delvc[i] > 0.0 {
 | |
|       q_new[i] = 0.0;
 | |
|     } else {
 | |
|       var ssc = (pbvc[i] * e_new[i] + vhalf**2 * bvc[i] * pHalfStep[i]) / rho0;
 | |
|       if ssc <= 0.0 then ssc = 0.333333e-36;
 | |
|       else ssc = sqrt(ssc);
 | |
|       q_new[i] = ssc * ql_old[i] + qq_old[i];
 | |
|     }
 | |
| 
 | |
|     e_new[i] += 0.5 * delvc[i]
 | |
|       * (3.0*(p_old[i] + q_old[i]) - 4.0*(pHalfStep[i] + q_new[i]));
 | |
|   }
 | |
|   forall i in Elems {
 | |
|     e_new[i] += 0.5 * work[i];
 | |
|     if abs(e_new[i] < e_cut) then e_new[i] = 0.0;
 | |
|     if e_new[i] < emin then e_new[i] = emin;
 | |
|   }
 | |
| 
 | |
|   CalcPressureForElems(p_new, bvc, pbvc, e_new, compression, vnewc, pmin,
 | |
|                        p_cut, eosvmax);
 | |
| 
 | |
|   forall i in Elems {
 | |
|     var q_tilde:real;
 | |
| 
 | |
|     if delvc[i] > 0.0 {
 | |
|       q_tilde = 0.0;
 | |
|     } else {
 | |
|       var ssc = (pbvc[i] * e_new[i] + vnewc[i]**2 * bvc[i] * p_new[i] ) / rho0;
 | |
|       if ssc <= 0.0 then ssc = 0.333333e-36;
 | |
|       else ssc = sqrt(ssc);
 | |
|       q_tilde = ssc * ql_old[i] + qq_old[i];
 | |
|     }
 | |
| 
 | |
|     e_new[i] -= (7.0*(p_old[i] + q_old[i]) 
 | |
|                  - 8.0*(pHalfStep[i] + q_new[i]) 
 | |
|                  + (p_new[i] + q_tilde)) * delvc[i] * sixth;
 | |
|     if abs(e_new[i]) < e_cut then e_new[i] = 0.0;
 | |
|     if e_new[i] < emin then e_new[i] = emin;
 | |
|   }
 | |
| 
 | |
|   CalcPressureForElems(p_new, bvc, pbvc, e_new, compression, vnewc, pmin,
 | |
|                        p_cut, eosvmax);
 | |
| 
 | |
| 
 | |
|   //
 | |
|   // TODO: Uncomment local once sparse domain is distributed
 | |
|   //
 | |
|   forall i in Elems /* do local */ {
 | |
|     if delvc[i] <= 0.0 {
 | |
|       var ssc = (pbvc[i] * e_new[i] + vnewc[i]**2 * bvc[i] * p_new[i] ) / rho0;
 | |
|       if ssc <= 0.0 then ssc = 0.333333e-36;
 | |
|                     else ssc = sqrt(ssc);
 | |
|       q_new[i] = ssc * ql_old[i] + qq_old[i];
 | |
|       if abs(q_new[i]) < q_cut then q_new[i] = 0.0;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| proc CalcSoundSpeedForElems(vnewc, rho0:real, enewc, pnewc, pbvc, bvc) {
 | |
|   // TODO: Open question: If we had multiple materials, should (a) ss
 | |
|   // be zeroed and accumulated into, and (b) updated atomically to
 | |
|   // avoid losing updates?  (Jeff will go back and think on this)
 | |
|   //
 | |
|   forall i in MatElems {
 | |
|     var ssTmp = (pbvc[i] * enewc[i] + vnewc[i]**2 * bvc[i] * pnewc[i]) / rho0;
 | |
|     if ssTmp <= 1.111111e-36 then ssTmp = 1.111111e-36;
 | |
|     ss[i] = sqrt(ssTmp);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| iter elemToNodes(elem) {
 | |
|   for i in 1..nodesPerElem do
 | |
|     yield elemToNode[elem][i];
 | |
| }
 | |
|                                  
 | |
| iter elemToNodesTuple(e) {
 | |
|   for i in 1..nodesPerElem do
 | |
|     yield (elemToNode[e][i], i);
 | |
| }
 | |
| 
 | |
| 
 | |
| proc deprint(title:string, x:[?D] real, y:[D] real, z:[D] real) {
 | |
|   writeln(title);
 | |
|   for i in D do
 | |
|     writef("%3i: %3.4er %3.4er %3.4er\n", 
 | |
|            if use3DRepresentation then idx3DTo1D(i, D.dim(1).size) else i, 
 | |
|            x[i], y[i], z[i]);
 | |
| }
 | |
| 
 | |
| 
 | |
| proc deprintatomic(title:string, x:[?D] atomic real, y:[] atomic real, z:[] atomic real) {
 | |
|   writeln(title);
 | |
|   for i in D do
 | |
|     writef("%3i: %3.4er %3.4er %3.4er\n", 
 | |
|            if use3DRepresentation then idx3DTo1D(i, D.dim(1).size) else i, 
 | |
|            x[i].peek(), y[i].peek(), z[i].peek());
 | |
| }
 |