diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 32143b7e..af5816a1 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -347,6 +347,14 @@ Ceylon: extensions: - .ceylon +Chapel: + type: programming + color: "#8dc63f" + aliases: + - chpl + extensions: + - .chpl + ChucK: lexer: Java extensions: diff --git a/lib/linguist/samples.json b/lib/linguist/samples.json index f48ee225..979c18ab 100644 --- a/lib/linguist/samples.json +++ b/lib/linguist/samples.json @@ -83,6 +83,9 @@ "Ceylon": [ ".ceylon" ], + "Chapel": [ + ".chpl" + ], "Cirru": [ ".cirru" ], @@ -792,8 +795,8 @@ "exception.zep.php" ] }, - "tokens_total": 650233, - "languages_total": 870, + "tokens_total": 659840, + "languages_total": 875, "tokens": { "ABAP": { "*/**": 1, @@ -16278,6 +16281,798 @@ "<=>": 1, "other.name": 1 }, + "Chapel": { + "//": 150, + "use": 5, + "BlockDist": 2, + "CyclicDist": 1, + "BlockCycDist": 1, + "ReplicatedDist": 2, + ";": 516, + "DimensionalDist2D": 2, + "ReplicatedDim": 2, + "BlockCycDim": 1, + "config": 10, + "const": 59, + "n": 16, + "Space": 12, + "{": 122, + "}": 120, + "BlockSpace": 2, + "dmapped": 8, + "Block": 4, + "(": 626, + "boundingBox": 2, + ")": 626, + "var": 72, + "BA": 3, + "[": 920, + "]": 920, + "int": 21, + "forall": 43, + "ba": 4, + "in": 76, + "do": 62, + "here.id": 7, + "writeln": 53, + "MyLocaleView": 5, + "#numLocales": 1, + "MyLocales": 5, + "locale": 1, + "reshape": 2, + "Locales": 6, + "BlockSpace2": 2, + "targetLocales": 1, + "BA2": 3, + "CyclicSpace": 2, + "Cyclic": 1, + "startIdx": 2, + "Space.low": 5, + "CA": 3, + "ca": 2, + "BlkCycSpace": 2, + "BlockCyclic": 1, + "blocksize": 1, + "BCA": 3, + "bca": 2, + "ReplicatedSpace": 2, + "RA": 11, + "ra": 4, + "RA.numElements": 1, + "A": 13, + "i": 250, + "j": 25, + "i*100": 1, + "+": 334, + "on": 7, + "here": 3, + "LocaleSpace.high": 3, + "nl1": 2, + "nl2": 2, + "if": 98, + "numLocales": 5, + "then": 80, + "else": 16, + "numLocales/2": 1, + "#nl1": 2, + "#nl2": 1, + "#nl1*nl2": 1, + "DimReplicatedBlockcyclicSpace": 2, + "new": 7, + "BlockCyclicDim": 1, + "lowIdx": 1, + "blockSize": 1, + "DRBA": 3, + "for": 36, + "locId1": 2, + "drba": 2, + "Helper": 2, + "print": 5, + "to": 9, + "the": 10, + "console": 1, + "Time": 2, + "get": 3, + "timing": 6, + "routines": 1, + "benchmarking": 1, + "block": 1, + "-": 345, + "distributed": 1, + "arrays": 6, + "luleshInit": 1, + "initialization": 1, + "code": 1, + "data": 1, + "set": 4, + "param": 25, + "useBlockDist": 5, + "CHPL_COMM": 1, + "use3DRepresentation": 4, + "false": 4, + "useSparseMaterials": 3, + "true": 5, + "printWarnings": 3, + "&&": 9, + "luleshInit.filename": 1, + "halt": 5, + "initialEnergy": 2, + "e": 84, + "initial": 1, + "energy": 5, + "value": 1, + "showProgress": 3, + "time": 9, + "and": 4, + "dt": 14, + "values": 1, + "each": 1, + "step": 1, + "debug": 8, + "various": 1, + "info": 1, + "doTiming": 4, + "main": 3, + "timestep": 1, + "loop": 2, + "printCoords": 2, + "final": 1, + "computed": 1, + "coordinates": 2, + "XI_M": 2, + "XI_M_SYMM": 4, + "XI_M_FREE": 3, + "XI_P": 2, + "c": 7, + "XI_P_SYMM": 3, + "XI_P_FREE": 3, + "ETA_M": 2, + "ETA_M_SYMM": 4, + "ETA_M_FREE": 3, + "ETA_P": 2, + "c0": 1, + "ETA_P_SYMM": 3, + "ETA_P_FREE": 3, + "ZETA_M": 2, + "ZETA_M_SYMM": 4, + "ZETA_M_FREE": 3, + "ZETA_P": 2, + "xc00": 1, + "ZETA_P_SYMM": 3, + "ZETA_P_FREE": 3, + "numElems": 2, + "numNodes": 1, + "initProblemSize": 1, + "ElemSpace": 4, + "#elemsPerEdge": 3, + "#numElems": 1, + "NodeSpace": 4, + "#nodesPerEdge": 3, + "#numNodes": 1, + "Elems": 45, + "Nodes": 16, + "x": 111, + "y": 107, + "z": 107, + "real": 60, + "nodesPerElem": 6, + "elemToNode": 7, + "nodesPerElem*index": 1, + "lxim": 3, + "lxip": 3, + "letam": 3, + "letap": 3, + "lzetam": 3, + "lzetap": 3, + "index": 4, + "XSym": 4, + "YSym": 4, + "ZSym": 4, + "sparse": 3, + "subdomain": 3, + "u_cut": 5, + "hgcoef": 1, + "qstop": 2, + "monoq_max_slope": 7, + "monoq_limiter_mult": 7, + "e_cut": 3, + "p_cut": 6, + "ss4o3": 1, + "/3.0": 2, + "q_cut": 2, + "v_cut": 2, + "qlc_monoq": 2, + "qqc_monoq": 2, + "qqc": 1, + "qqc2": 1, + "*": 260, + "qqc**2": 1, + "eosvmax": 14, + "eosvmin": 9, + "pmin": 7, + "emin": 7, + "dvovmax": 1, + "refdens": 3, + "deltatimemultlb": 1, + "deltatimemultub": 1, + "dtmax": 1, + "stoptime": 3, + "maxcycles": 3, + "max": 2, + "dtfixed": 2, + "MatElems": 9, + "MatElemsType": 2, + "enumerateMatElems": 2, + "proc": 44, + "type": 1, + "return": 15, + "Elems.type": 1, + "iter": 3, + "yield": 3, + "elemBC": 16, + "p": 10, + "pressure": 1, + "q": 13, + "ql": 3, + "linear": 1, + "term": 2, + "qq": 3, + "quadratic": 1, + "v": 9, + "//relative": 1, + "volume": 21, + "vnew": 9, + "volo": 5, + "reference": 1, + "delv": 3, + "m_vnew": 1, + "m_v": 1, + "vdov": 4, + "derivative": 1, + "over": 2, + "arealg": 2, + "elem": 3, + "characteristic": 2, + "length": 2, + "ss": 2, + "elemMass": 4, + "mass": 12, + "xd": 8, + "yd": 8, + "zd": 8, + "velocities": 2, + "xdd": 3, + "ydd": 3, + "zdd": 3, + "acceleration": 1, + "fx": 8, + "fy": 8, + "fz": 8, + "atomic": 2, + "forces": 1, + "nodalMass": 3, + "current": 1, + "deltatime": 3, + "variable": 1, + "increment": 1, + "dtcourant": 1, + "e20": 2, + "courant": 1, + "constraint": 2, + "dthydro": 1, + "change": 2, + "cycle": 5, + "iteration": 1, + "count": 1, + "simulation": 1, + "initLulesh": 2, + "st": 4, + "getCurrentTime": 4, + "while": 4, + "<": 42, + "iterTime": 2, + "TimeIncrement": 2, + "LagrangeLeapFrog": 1, + "deprint": 3, + "format": 9, + "et": 3, + "/cycle": 1, + "outfile": 1, + "open": 1, + "iomode.cw": 1, + "writer": 1, + "outfile.writer": 1, + "fmtstr": 4, + "writer.writeln": 1, + "writer.close": 1, + "outfile.close": 1, + "initCoordinates": 1, + "initElemToNodeMapping": 1, + "initGreekVars": 1, + "initXSyms": 1, + "initYSyms": 1, + "initZSyms": 1, + "//calculated": 1, + "fly": 1, + "using": 1, + "elemToNodes": 3, + "initMasses": 2, + "octantCorner": 2, + "initBoundaryConditions": 2, + "massAccum": 3, + "eli": 18, + "x_local": 11, + "y_local": 11, + "z_local": 11, + "*real": 40, + "localizeNeighborNodes": 6, + "CalcElemVolume": 3, + "neighbor": 2, + ".add": 1, + ".read": 1, + "/": 26, + "surfaceNode": 8, + "mask": 16, + "<<": 2, + "&": 18, + "f": 4, + "|": 14, + "xf0": 4, + "xcc": 4, + "check": 3, + "loc": 4, + "maxloc": 1, + "reduce": 2, + "zip": 1, + "freeSurface": 3, + "initFreeSurface": 1, + "b": 4, + "inline": 5, + "ref": 19, + "noi": 4, + "TripleProduct": 4, + "x1": 1, + "y1": 1, + "z1": 1, + "x2": 1, + "y2": 1, + "z2": 1, + "x3": 1, + "y3": 1, + "z3": 1, + "x1*": 1, + "y2*z3": 1, + "z2*y3": 1, + "x2*": 1, + "z1*y3": 1, + "y1*z3": 1, + "x3*": 1, + "y1*z2": 1, + "z1*y2": 1, + "dx61": 2, + "dy61": 2, + "dz61": 2, + "dx70": 2, + "dy70": 2, + "dz70": 2, + "dx63": 2, + "dy63": 2, + "dz63": 2, + "dx20": 2, + "dy20": 2, + "dz20": 2, + "dx50": 2, + "dy50": 2, + "dz50": 2, + "dx64": 2, + "dy64": 2, + "dz64": 2, + "dx31": 2, + "dy31": 2, + "dz31": 2, + "dx72": 2, + "dy72": 2, + "dz72": 2, + "dx43": 2, + "dy43": 2, + "dz43": 2, + "dx57": 2, + "dy57": 2, + "dz57": 2, + "dx14": 2, + "dy14": 2, + "dz14": 2, + "dx25": 2, + "dy25": 2, + "dz25": 2, + "InitStressTermsForElems": 1, + "sigxx": 2, + "sigyy": 2, + "sigzz": 2, + "D": 9, + "CalcElemShapeFunctionDerivatives": 2, + "b_x": 18, + "b_y": 18, + "b_z": 18, + "fjxxi": 5, + ".125": 9, + "fjxet": 6, + "fjxze": 5, + "fjyxi": 5, + "fjyet": 6, + "fjyze": 5, + "fjzxi": 5, + "fjzet": 6, + "fjzze": 5, + "cjxxi": 5, + "cjxet": 6, + "cjxze": 5, + "cjyxi": 5, + "cjyet": 6, + "cjyze": 5, + "cjzxi": 5, + "cjzet": 6, + "cjzze": 5, + "CalcElemNodeNormals": 1, + "pfx": 15, + "pfy": 15, + "pfz": 15, + "ElemFaceNormal": 7, + "n1": 23, + "n2": 23, + "n3": 23, + "n4": 23, + "bisectX0": 3, + "bisectY0": 3, + "bisectZ0": 3, + "bisectX1": 3, + "bisectY1": 3, + "bisectZ1": 3, + "areaX": 5, + "areaY": 5, + "areaZ": 5, + "rx": 6, + "ry": 6, + "rz": 6, + "//results": 1, + "SumElemStressesToNodeForces": 1, + "stress_xx": 2, + "stress_yy": 2, + "stress_zz": 2, + "CalcElemVolumeDerivative": 1, + "VoluDer": 9, + "n0": 13, + "n5": 13, + "ox": 1, + "oy": 1, + "oz": 1, + "ox/12.0": 1, + "oy/12.0": 1, + "oz/12.0": 1, + "dvdx": 10, + "dvdy": 10, + "dvdz": 10, + "CalcElemFBHourglassForce": 1, + "hourgam": 7, + "coefficient": 4, + "hgfx": 2, + "hgfy": 2, + "hgfz": 2, + "hx": 3, + "hy": 3, + "hz": 3, + "shx": 3, + "shy": 3, + "shz": 3, + "CalcElemCharacteristicLength": 2, + "AreaFace": 7, + "p0": 7, + "p1": 7, + "p2": 7, + "p3": 7, + "gx": 5, + "gy": 5, + "gz": 5, + "area": 2, + "charLength": 2, + "sqrt": 10, + "CalcElemVelocityGradient": 2, + "xvel": 25, + "yvel": 25, + "zvel": 25, + "detJ": 5, + "d": 12, + "inv_detJ": 10, + "dyddx": 2, + "dxddy": 2, + "dzddx": 2, + "dxddz": 2, + "dzddy": 2, + "dyddz": 2, + "CalcPressureForElems": 4, + "p_new": 17, + "bvc": 15, + "pbvc": 14, + "e_old": 7, + "compression": 10, + "vnewc": 22, + "c1s": 3, + "abs": 8, + "//impossible": 1, + "targetdt": 1, + "//don": 1, + "t": 3, + "have": 2, + "negative": 2, + "determ": 1, + "can": 2, + "we": 1, + "be": 2, + "able": 1, + "write": 1, + "these": 1, + "as": 1, + "follows": 1, + "CalcVelocityForNodes": 1, + "local": 8, + "xdtmp": 4, + "ydtmp": 4, + "zdtmp": 4, + "CalcPositionForNodes": 1, + "ijk": 7, + "CalcLagrangeElements": 1, + "dxx": 6, + "dyy": 6, + "dzz": 6, + "CalcKinematicsForElems": 2, + "k": 20, + "vdovthird": 4, + "<=>": 7, + "0": 5, + "all": 1, + "elements": 3, + "8": 4, + "6": 1, + "nodal": 2, + "from": 2, + "global": 3, + "copy": 2, + "into": 3, + "xd_local": 4, + "yd_local": 4, + "zd_local": 4, + "dt2": 4, + "5": 1, + "wish": 1, + "this": 2, + "was": 1, + "too": 1, + "calculations": 1, + "relativeVolume": 3, + "1": 2, + "put": 1, + "velocity": 3, + "gradient": 3, + "quantities": 1, + "their": 1, + "2": 1, + "3": 1, + "sungeun": 1, + "Temporary": 1, + "array": 4, + "reused": 1, + "throughout": 1, + "delv_xi": 12, + "delv_eta": 12, + "delv_zeta": 12, + "position": 1, + "delx_xi": 7, + "delx_eta": 7, + "delx_zeta": 7, + "CalcQForElems": 1, + "MONOTONIC": 1, + "Q": 1, + "option": 1, + "Calculate": 1, + "gradients": 2, + "CalcMonotonicQGradientsForElems": 2, + "Transfer": 2, + "veloctiy": 1, + "first": 1, + "order": 1, + "problem": 1, + "commElements": 1, + "CommElements": 1, + "monoQ": 1, + "*/": 1, + "CalcMonotonicQForElems": 2, + "ApplyMaterialPropertiesForElems": 1, + "matelm": 2, + "vc": 6, + "exit": 1, + "EvalEOSForElems": 2, + "UpdateVolumesForElems": 1, + "tmpV": 4, + "ptiny": 9, + "xl": 26, + "yl": 26, + "zl": 26, + "xvl": 26, + "yvl": 26, + "zvl": 26, + "vol": 5, + "norm": 20, + "ax": 7, + "ay": 7, + "az": 7, + "dxv": 4, + "dyv": 4, + "dzv": 4, + "dxj": 1, + "dyj": 1, + "dzj": 1, + "dxi": 1, + "dyi": 1, + "dzi": 1, + "dxk": 1, + "dyk": 1, + "dzk": 1, + "dyi*dzj": 1, + "dzi*dyj": 1, + "dzi*dxj": 1, + "dxi*dzj": 1, + "dxi*dyj": 1, + "dyi*dxj": 1, + "ax*ax": 3, + "ay*ay": 3, + "az*az": 3, + "ax*dxv": 3, + "ay*dyv": 3, + "az*dzv": 3, + "dyj*dzk": 1, + "dzj*dyk": 1, + "dzj*dxk": 1, + "dxj*dzk": 1, + "dxj*dyk": 1, + "dyj*dxk": 1, + "dyk*dzi": 1, + "dzk*dyi": 1, + "dzk*dxi": 1, + "dxk*dzi": 1, + "dxk*dyi": 1, + "dyk*dxi": 1, + "//got": 1, + "rid": 1, + "of": 3, + "call": 1, + "through": 1, + "bcMask": 7, + "delvm": 27, + "delvp": 27, + "select": 6, + "when": 18, + "phixi": 10, + "phieta": 10, + "phizeta": 10, + "qlin": 4, + "qquad": 4, + "delvxxi": 4, + "delvxeta": 4, + "delvxzeta": 4, + "rho": 3, + "delvxxi**2": 1, + "phixi**2": 1, + "delvxeta**2": 1, + "phieta**2": 1, + "delvxzeta**2": 1, + "phizeta**2": 1, + "rho0": 8, + "delvc": 11, + "p_old": 8, + "q_old": 7, + "compHalfStep": 8, + "qq_old": 7, + "ql_old": 7, + "work": 5, + "e_new": 25, + "q_new": 11, + "vchalf": 2, + "CalcEnergyForElems": 2, + "CalcSoundSpeedForElems": 2, + "sixth": 2, + "pHalfStep": 5, + "vhalf": 1, + "ssc": 18, + "vhalf**2": 1, + "q_tilde": 4, + "**2": 6, + "enewc": 2, + "pnewc": 2, + "ssTmp": 4, + "elemToNodesTuple": 1, + "title": 2, + "string": 1, + "pi": 1, + "solarMass": 7, + "pi**2": 1, + "daysPerYear": 13, + "record": 1, + "body": 6, + "pos": 5, + "does": 1, + "not": 1, + "after": 1, + "it": 1, + "is": 1, + "up": 1, + "bodies": 8, + "numbodies": 5, + "bodies.numElements": 1, + "initSun": 2, + "writef": 2, + "advance": 2, + "b.v": 2, + "b.mass": 1, + ".v": 1, + "updateVelocities": 2, + "b1": 2, + "b2": 2, + "dpos": 4, + "b1.pos": 2, + "b2.pos": 2, + "mag": 3, + "sumOfSquares": 4, + "**3": 1, + "b1.v": 2, + "b2.mass": 2, + "b2.v": 1, + "b1.mass": 3, + "b.pos": 1, + "Random": 1, + "random": 1, + "number": 1, + "generation": 1, + "Timer": 2, + "class": 1, + "timer": 2, + "sort": 1, + "**15": 1, + "size": 1, + "sorted": 1, + "thresh": 6, + "recursive": 1, + "depth": 1, + "serialize": 1, + "verbose": 7, + "out": 1, + "many": 1, + "bool": 1, + "disable": 1, + "numbers": 1, + "fillRandom": 1, + "timer.start": 1, + "pqsort": 4, + "timer.stop": 1, + "timer.elapsed": 1, + "arr": 32, + "low": 12, + "arr.domain.low": 1, + "high": 14, + "arr.domain.high": 1, + "where": 2, + "arr.rank": 2, + "bubbleSort": 2, + "pivotVal": 9, + "findPivot": 2, + "pivotLoc": 3, + "partition": 2, + "serial": 1, + "cobegin": 1, + "mid": 7, + "ilo": 9, + "ihi": 6, + "low..high": 2 + }, "Cirru": { "print": 38, "array": 14, @@ -70803,6 +71598,7 @@ "COBOL": 90, "CSS": 43867, "Ceylon": 50, + "Chapel": 9607, "Cirru": 244, "Clojure": 510, "CoffeeScript": 2951, @@ -71003,6 +71799,7 @@ "COBOL": 4, "CSS": 2, "Ceylon": 1, + "Chapel": 5, "Cirru": 9, "Clojure": 7, "CoffeeScript": 9, @@ -71179,5 +71976,5 @@ "fish": 3, "wisp": 1 }, - "md5": "b82e97a2c9c123d8e370eeffe64a1a1d" + "md5": "4c7029e118ceca989e36b7f3c025a771" } \ No newline at end of file diff --git a/samples/Chapel/distributions.chpl b/samples/Chapel/distributions.chpl new file mode 100644 index 00000000..a3e5f81d --- /dev/null +++ b/samples/Chapel/distributions.chpl @@ -0,0 +1,304 @@ +// +// Distributions Primer +// +// This primer demonstrates uses of some of Chapel's standard +// distributions. To use these distributions in a Chapel program, +// the respective module must be used: +// +use BlockDist, CyclicDist, BlockCycDist, ReplicatedDist; +use DimensionalDist2D, ReplicatedDim, BlockCycDim; + +// +// For each distribution, we'll create a distributed domain and array +// and then initialize it just to give a brief flavor of how the +// distribution maps across locales. Running this example on 6 +// locales does a nice job of illustrating the distribution +// characteristics. +// +// All of these distributions support options to map to a different +// virtual locale grid than the one used by default (a +// multidimensional factoring of the built-in Locales array), as well +// as to control the amount of parallelism used in data parallel +// loops. See the Standard Distributions chapter of the language spec +// for more details. +// + +// +// Make the program size configurable from the command line. +// +config const n = 8; + +// +// Declare a 2-dimensional domain Space that we will later use to +// initialize the distributed domains. +// +const Space = {1..n, 1..n}; + +// +// The Block distribution distributes a bounding box from +// n-dimensional space across the target locale array viewed as an +// n-dimensional virtual locale grid. The bounding box is blocked +// into roughly equal portions across the locales. Note that domains +// declared over a Block distribution can also store indices outside +// of the bounding box; the bounding box is merely used to compute +// the blocking of space. +// +// In this example, we declare a 2-dimensional Block-distributed +// domain BlockSpace and a Block-distributed array BA declared over +// the domain. +// +const BlockSpace = Space dmapped Block(boundingBox=Space); +var BA: [BlockSpace] int; + +// +// To illustrate how the index set is distributed across locales, +// we'll use a forall loop to initialize each array element to the +// locale ID that stores that index/element/iteration. +// +forall ba in BA do + ba = here.id; + +// +// Output the Block-distributed array to visually see how the elements +// are partitioned across the locales. +// +writeln("Block Array Index Map"); +writeln(BA); +writeln(); + +// +// Most of Chapel's standard distributions support an optional +// targetLocales argument that permits you to pass in your own +// array of locales to be targeted. In general, the targetLocales +// argument should match the rank of the distribution. So for +// example, to map a Block to a [numLocales x 1] view of the +// locale set, one could do something like this: + +// +// We start by creating our own array of the locale values. Here +// we use the standard array reshape function for convenience, +// but more generally, this array could be accessed/assigned like any +// other. +// + +var MyLocaleView = {0..#numLocales, 1..1}; +var MyLocales: [MyLocaleView] locale = reshape(Locales, MyLocaleView); + +// +// Then we'll declare a distributed domain/array that targets +// this view of the locales: +// + +const BlockSpace2 = Space dmapped Block(boundingBox=Space, + targetLocales=MyLocales); +var BA2: [BlockSpace2] int; + +// +// Then we'll do a similar computation as before to verify where +// everything ended up: +// +forall ba in BA2 do + ba = here.id; + +writeln("Block Array Index Map"); +writeln(BA2); +writeln(); + + + +// +// Next, we'll perform a similar computation for the Cyclic distribution. +// Cyclic distributions start at a designated n-dimensional index and +// distribute the n-dimensional space across an n-dimensional array +// of locales in a round-robin fashion (in each dimension). As with +// the Block distribution, domains may be declared using the +// distribution who have lower indices that the starting index; that +// value should just be considered a parameterization of how the +// distribution is defined. +// +const CyclicSpace = Space dmapped Cyclic(startIdx=Space.low); +var CA: [CyclicSpace] int; + +forall ca in CA do + ca = here.id; + +writeln("Cyclic Array Index Map"); +writeln(CA); +writeln(); + + +// +// Next, we'll declare a Block-Cyclic distribution. These +// distributions also deal out indices in a round-robin fashion, +// but rather than dealing out singleton indices, they deal out blocks +// of indices. Thus, the BlockCyclic distribution is parameterized +// by a starting index (as with Cyclic) and a block size (per +// dimension) specifying how large the chunks to be dealt out are. +// +const BlkCycSpace = Space dmapped BlockCyclic(startIdx=Space.low, + blocksize=(2, 3)); +var BCA: [BlkCycSpace] int; + +forall bca in BCA do + bca = here.id; + +writeln("Block-Cyclic Array Index Map"); +writeln(BCA); +writeln(); + + +// +// The ReplicatedDist distribution is different: each of the +// original domain's indices - and the corresponding array elements - +// is replicated onto each locale. (Note: consistency among these +// array replicands is NOT maintained automatically.) +// +// This replication is observable in some cases but not others, +// as shown below. Note: this behavior may change in the future. +// +const ReplicatedSpace = Space dmapped ReplicatedDist(); +var RA: [ReplicatedSpace] int; + +// The replication is observable - this visits each replicand. +forall ra in RA do + ra = here.id; + +writeln("Replicated Array Index Map, ", RA.numElements, " elements total"); +writeln(RA); +writeln(); + +// +// The replication is observable when the replicated array is +// on the left-hand side. If the right-hand side is not replicated, +// it is copied into each replicand. +// We illustrate this using a non-distributed array. +// +var A: [Space] int = [(i,j) in Space] i*100 + j; +RA = A; +writeln("Replicated Array after being array-assigned into"); +writeln(RA); +writeln(); + +// +// Analogously, each replicand will be visited and +// other participated expressions will be computed on each locale +// (a) when the replicated array is assigned a scalar: +// RA = 5; +// (b) when it appears first in a zippered forall loop: +// forall (ra, a) in zip(RA, A) do ...; +// (c) when it appears in a for loop: +// for ra in RA do ...; +// +// Zippering (RA,A) or (A,RA) in a 'for' loop will generate +// an error due to their different number of elements. + +// Let RA store the Index Map again, for the examples below. +forall ra in RA do + ra = here.id; + +// +// Only the local replicand is accessed - replication is NOT observable +// and consistency is NOT maintained - when: +// (a) the replicated array is indexed - an individual element is read... +// +on Locales(0) do + writeln("on ", here, ": ", RA(Space.low)); +on Locales(LocaleSpace.high) do + writeln("on ", here, ": ", RA(Space.low)); +writeln(); + +// ...or an individual element is written; +on Locales(LocaleSpace.high) do + RA(Space.low) = 7777; + +writeln("Replicated Array after being indexed into"); +writeln(RA); +writeln(); + +// +// (b) the replicated array is on the right-hand side of an assignment... +// +on Locales(LocaleSpace.high) do + A = RA + 4; +writeln("Non-Replicated Array after assignment from Replicated Array + 4"); +writeln(A); +writeln(); + +// +// (c) ...or, generally, the replicated array or domain participates +// in a zippered forall loop, but not in the first position. +// The loop could look like: +// +// forall (a, (i,j), ra) in (A, ReplicatedSpace, RA) do ...; +// + + +// +// The DimensionalDist2D distribution lets us build a 2D distribution +// as a composition of specifiers for individual dimensions. +// Under such a "dimensional" distribution each dimension is handled +// independently of the other. +// +// The dimension specifiers are similar to the corresponding multi-dimensional +// distributions in constructor arguments and index-to-locale mapping rules. +// However, instead of an array of locales, a specifier constructor +// accepts just the number of locales that the indices in the corresponding +// dimension will be distributed across. +// +// The DimensionalDist2D constructor requires: +// * an [0..nl1-1, 0..nl2-1] array of locales, where +// nl1 and nl2 are the number of locales in each dimension, and +// * two dimension specifiers, created for nl1 and nl2 locale counts, resp. +// +// Presently, the following dimension specifiers are available +// (shown here with their constructor arguments): +// +// * ReplicatedDim(numLocales) +// * BlockDim(numLocales, boundingBoxLow, boundingBoxHigh) +// * BlockCyclicDim(lowIdx, blockSize, numLocales) +// + +// +// The following example creates a dimensional distribution that +// replicates over 2 locales (when available) in the first dimemsion +// and distributes using block-cyclic distribution in the second dimension. +// The example computes nl1 and nl2 and reshapes MyLocales correspondingly. +// + +var (nl1, nl2) = if numLocales == 1 then (1, 1) else (2, numLocales/2); +MyLocaleView = {0..#nl1, 0..#nl2}; +MyLocales = reshape(Locales[0..#nl1*nl2], MyLocaleView); + +const DimReplicatedBlockcyclicSpace = Space + dmapped DimensionalDist2D(MyLocales, + new ReplicatedDim(numLocales = nl1), + new BlockCyclicDim(numLocales = nl2, + lowIdx = 1, blockSize = 2)); + +var DRBA: [DimReplicatedBlockcyclicSpace] int; + +// The ReplicatedDim specifier always accesses the local replicand. +// (This differs from how the ReplicatedDist distribution works.) +// +// This example visits each replicand. The behavior is the same +// regardless of the second index into MyLocales below. + +for locId1 in 0..#nl1 do on MyLocales[locId1, 0] { + + forall drba in DRBA do + drba = here.id; + + writeln("Dimensional2D(Replicated,BlockCyclic) Array Index Map", + " from ", here); + + // Technicality: 'writeln(DRBA)' would read DRBA always on Locale 0. + // Since we want to see what DRBA contains on the current locale, + // we use 'Helper' that is mapped using the default distribution. + // 'Helper = DRBA' captures the view of DRBA on the current locale, + // which we then print out. + + const Helper: [Space] int = DRBA; + writeln(Helper); + writeln(); + +} diff --git a/samples/Chapel/hello.chpl b/samples/Chapel/hello.chpl new file mode 100644 index 00000000..cc98e786 --- /dev/null +++ b/samples/Chapel/hello.chpl @@ -0,0 +1 @@ +writeln("Hello, world!"); // print 'Hello, world!' to the console diff --git a/samples/Chapel/lulesh.chpl b/samples/Chapel/lulesh.chpl new file mode 100644 index 00000000..d6480d26 --- /dev/null +++ b/samples/Chapel/lulesh.chpl @@ -0,0 +1,1692 @@ +/* + Derived from the DARPA/Livermore Unstructured Lagrangian Explicit + Shock Hydrodynamics (LULESH) + https://computation.llnl.gov/casc/ShockHydro/ + + Original port to Chapel by Brandon Holt (8/2011). Further + improvements for the sake of performance and/or generality made by + Sung-Eun Choi (12/2011, 11/2012), Jeff Keasler (3/2012), and Brad + Chamberlain (3-4,9-11/2012, 2/2013). + + + Notes on the Initial Implementation + ----------------------------------- + + This implementation was designed to mirror the overall structure of + the C++ Lulesh but use Chapel constructs where they can help make + the code more readable, easier to maintain, or more + 'elegant'. Function names are preserved for the most part, with some + additional helper functions, and original comments from the C++ code + are interspersed approximately where they belong to give an idea of + how the two codes line up. One major difference for this Chapel + version is the use of a number of module-level variables and + constants. + + + Status: + + This code remains a work-in-progress as we gain further experience + with it. Proposed improvements are noted in the README in this + directory and (in some cases) in TODO comments in the code. + + */ + + + +use Time, // to get timing routines for benchmarking + BlockDist; // for block-distributed arrays + +use luleshInit; // initialization code for data set + +/* The configuration parameters for lulesh. These can be set on the + compiler command line using -s=. For example, + + chpl -suseBlockDist=true + + useBlockDist : says whether or not to block-distribute the arrays. + The default is based on the value of CHPL_COMM, as + an indication of whether this is a single- or multi- + locale execution. + + use3DRepresentation : indicates whether the element node arrays + should be stored using a 3D representation + (limiting the execution to cube inputs) or + the more general 1D representation (supporting + arbitrary data sets). + + useSparseMaterials : indicates whether sparse domains/arrays should be + used to represent the materials. Sparse domains + are more realistic in that they permit an arbitrary + subset of the problem space to store a material. + Dense domains are sufficient for LULESH since there's + an assumption that the material spans all cells. + + printWarnings : prints performance-oriented warnings to prevent + surprises. +*/ + +config param useBlockDist = (CHPL_COMM != "none"), + use3DRepresentation = false, + useSparseMaterials = true, + printWarnings = true; + + +// +// Sanity check to ensure that input files aren't used with the 3D +// representation +// +if (use3DRepresentation && (luleshInit.filename != "")) then + halt("The 3D representation does not support reading input from files"); + + +/* Configuration constants: Override defaults on executable's command-line */ + +config const initialEnergy = 3.948746e+7; // initial energy value + + +config const showProgress = false, // print time and dt values on each step + debug = false, // print various debug info + doTiming = true, // time the main timestep loop + printCoords = true; // print the final computed coordinates + + +/* Compile-time constants */ + +param XI_M = 0x003, + XI_M_SYMM = 0x001, + XI_M_FREE = 0x002, + + XI_P = 0x00c, + XI_P_SYMM = 0x004, + XI_P_FREE = 0x008, + + ETA_M = 0x030, + ETA_M_SYMM = 0x010, + ETA_M_FREE = 0x020, + + ETA_P = 0x0c0, + ETA_P_SYMM = 0x040, + ETA_P_FREE = 0x080, + + ZETA_M = 0x300, + ZETA_M_SYMM = 0x100, + ZETA_M_FREE = 0x200, + + ZETA_P = 0xc00, + ZETA_P_SYMM = 0x400, + ZETA_P_FREE = 0x800; + + +/* Set up the problem size */ + +const (numElems, numNodes) = initProblemSize(); + + +/* Declare abstract problem domains */ + +const ElemSpace = if use3DRepresentation + then {0..#elemsPerEdge, 0..#elemsPerEdge, 0..#elemsPerEdge} + else {0..#numElems}, + NodeSpace = if use3DRepresentation + then {0..#nodesPerEdge, 0..#nodesPerEdge, 0..#nodesPerEdge} + else {0..#numNodes}; + + +/* Declare the (potentially distributed) problem domains */ + +const Elems = if useBlockDist then ElemSpace dmapped Block(ElemSpace) + else ElemSpace, + Nodes = if useBlockDist then NodeSpace dmapped Block(NodeSpace) + else NodeSpace; + + +/* The coordinates */ + +var x, y, z: [Nodes] real; + + +/* The number of nodes per element. In a rank-independent version, + this could be written 2**rank */ + +param nodesPerElem = 8; + + +// We could name this, but chose not to since it doesn't add that much clarity +// +// const elemNeighbors = 1..nodesPerElem; + + +/* The element-to-node mapping */ + +var elemToNode: [Elems] nodesPerElem*index(Nodes); + + +/* the Greek variables */ + +var lxim, lxip, letam, letap, lzetam, lzetap: [Elems] index(Elems); + + +/* the X, Y, Z Symmetry values */ + +var XSym, YSym, ZSym: sparse subdomain(Nodes); + + + +/* Constants */ + +const u_cut = 1.0e-7, /* velocity tolerance */ + hgcoef = 3.0, /* hourglass control */ + qstop = 1.0e+12, /* excessive q indicator */ + monoq_max_slope = 1.0, + monoq_limiter_mult = 2.0, + e_cut = 1.0e-7, /* energy tolerance */ + p_cut = 1.0e-7, /* pressure tolerance */ + ss4o3 = 4.0/3.0, + q_cut = 1.0e-7, /* q tolerance */ + v_cut = 1.0e-10, /* relative volume tolerance */ + qlc_monoq = 0.5, /* linear term coef for q */ + qqc_monoq = 2.0/3.0, /* quadratic term coef for q */ + qqc = 2.0, + qqc2 = 64.0 * qqc**2, + eosvmax = 1.0e+9, + eosvmin = 1.0e-9, + pmin = 0.0, /* pressure floor */ + emin = -1.0e+15, /* energy floor */ + dvovmax = 0.1, /* maximum allowable volume change */ + refdens = 1.0, /* reference density */ + + deltatimemultlb = 1.1, + deltatimemultub = 1.2, + dtmax = 1.0e-2; /* maximum allowable time increment */ + + +config const stoptime = 1.0e-2, /* end time for simulation */ + maxcycles = max(int), /* max number of cycles to simulate */ + dtfixed = -1.0e-7; /* fixed time increment */ + + +/* The list of material elements */ + +const MatElems: MatElemsType = if useSparseMaterials then enumerateMatElems() + else Elems; + + +proc MatElemsType type { + if useSparseMaterials { + if (printWarnings && useBlockDist && numLocales > 1) then + writeln("WARNING: The LULESH Material Elements (MatElems) are not yet\n", + " distributed, so result in excessive memory use on,\n", + " and communication with, locale 0\n"); + return sparse subdomain(Elems); + } else + return Elems.type; +} + +iter enumerateMatElems() { + if (printWarnings && useBlockDist && numLocales > 1) then + writeln("WARNING: generation of matrix elements is serial and\n", + " unlikely to scale"); + for i in Elems do + yield i; +} + + +/* Element fields */ + +var elemBC: [Elems] int, + + e: [Elems] real, // energy + p: [Elems] real, // pressure + + q: [Elems] real, // q + ql: [Elems] real, // linear term for q + qq: [Elems] real, // quadratic term for q + + v: [Elems] real = 1.0, //relative volume + vnew: [Elems] real, + + volo: [Elems] real, // reference volume + delv: [Elems] real, // m_vnew - m_v + vdov: [Elems] real, // volume derivative over volume + + arealg: [Elems] real, // elem characteristic length + + ss: [Elems] real, // "sound speed" + + elemMass: [Elems] real; // mass + + +/* Nodal fields */ + +var xd, yd, zd: [Nodes] real, // velocities + + xdd, ydd, zdd: [Nodes] real, // acceleration + + fx, fy, fz: [Nodes] atomic real, // forces + + nodalMass: [Nodes] real; // mass + + +/* Parameters */ + +var time = 0.0, // current time + deltatime = 1.0e-7, // variable time increment + dtcourant = 1.0e20, // courant constraint + dthydro = 1.0e20, // volume change constraint + + cycle = 0; // iteration count for simulation + + +proc main() { + if debug then writeln("Lulesh -- Problem Size = ", numElems); + + initLulesh(); + + var st: real; + if doTiming then st = getCurrentTime(); + while (time < stoptime && cycle < maxcycles) { + const iterTime = if showProgress then getCurrentTime() else 0.0; + + TimeIncrement(); + + LagrangeLeapFrog(); + + if debug { + // deprint("[[ Forces ]]", fx, fy, fz); + deprint("[[ Positions ]]", x, y, z); + deprint("[[ p, e, q ]]", p, e, q); + } + if showProgress then + writeln("time = ", format("%e", time), ", dt=", format("%e", deltatime), + if doTiming then ", elapsed = " + (getCurrentTime()-iterTime) + else ""); + } + if (cycle == maxcycles) { + writeln("Stopped early due to reaching maxnumsteps"); + } + if doTiming { + const et = getCurrentTime(); + writeln("Total Time: ", et-st); + writeln("Time/Cycle: ", (et-st)/cycle); + } + writeln("Number of cycles: ", cycle); + + if printCoords { + var outfile = open("coords.out", iomode.cw); + var writer = outfile.writer(); + var fmtstr = if debug then "%1.9e" else "%1.4e"; + for i in Nodes { + writer.writeln(format(fmtstr, x[i]), " ", + format(fmtstr, y[i]), " ", + format(fmtstr, z[i])); + } + writer.close(); + outfile.close(); + } +} + + +/* Initialization functions */ + +proc initLulesh() { + // initialize the coordinates + initCoordinates(x,y,z); + + // initialize the element to node mapping + initElemToNodeMapping(elemToNode); + + // initialize the greek symbols + initGreekVars(lxim, lxip, letam, letap, lzetam, lzetap); + + // initialize the symmetry plane locations + initXSyms(XSym); + initYSyms(YSym); + initZSyms(ZSym); + + /* embed hexehedral elements in nodal point lattice */ + //calculated on the fly using: elemToNodes(i: index(Elems)): index(Nodes) + + // initialize the masses + initMasses(); + + // initialize the boundary conditions + const octantCorner = initBoundaryConditions(); + + // deposit the energy for Sedov Problem + e[octantCorner] = initialEnergy; +} + + +proc initMasses() { + // This is a temporary array used to accumulate masses in parallel + // without losing updates by using 'atomic' variables + var massAccum: [Nodes] atomic real; + + forall eli in Elems { + var x_local, y_local, z_local: 8*real; + localizeNeighborNodes(eli, x, x_local, y, y_local, z, z_local); + + const volume = CalcElemVolume(x_local, y_local, z_local); + volo[eli] = volume; + elemMass[eli] = volume; + + for neighbor in elemToNodes[eli] do + massAccum[neighbor].add(volume); + } + + // When we're done, copy the accumulated masses into nodalMass, at + // which point the massAccum array can go away (and will at the + // procedure's return + + forall i in Nodes do + nodalMass[i] = massAccum[i].read() / 8.0; + + if debug { + writeln("ElemMass:"); + for mass in elemMass do writeln(mass); + + writeln("NodalMass:"); + for mass in nodalMass do writeln(mass); + } +} + + +proc initBoundaryConditions() { + var surfaceNode: [Nodes] int; + + forall n in XSym do + surfaceNode[n] = 1; + forall n in YSym do + surfaceNode[n] = 1; + forall n in ZSym do + surfaceNode[n] = 1; + + forall e in Elems do { + var mask: int; + for i in 1..nodesPerElem do + mask += surfaceNode[elemToNode[e][i]] << (i-1); + + // TODO: make an inlined function for this little idiom? (and below) + + if ((mask & 0x0f) == 0x0f) then elemBC[e] |= ZETA_M_SYMM; + if ((mask & 0xf0) == 0xf0) then elemBC[e] |= ZETA_P_SYMM; + if ((mask & 0x33) == 0x33) then elemBC[e] |= ETA_M_SYMM; + if ((mask & 0xcc) == 0xcc) then elemBC[e] |= ETA_P_SYMM; + if ((mask & 0x99) == 0x99) then elemBC[e] |= XI_M_SYMM; + if ((mask & 0x66) == 0x66) then elemBC[e] |= XI_P_SYMM; + } + + + // + // We find the octant corner by looking for the element with + // all three SYMM flags set, which will have the largest + // integral value. Thus, we can use a maxloc to identify it. + // + const (check, loc) = maxloc reduce zip(elemBC, Elems); + + if debug then writeln("Found the octant corner at: ", loc); + + if (check != (XI_M_SYMM | ETA_M_SYMM | ZETA_M_SYMM)) then + halt("maxloc got a value of ", check, " at loc ", loc); + + // TODO: This is an example of an array that, in a distributed + // memory code, would typically be completely local and only storing + // the local nodes owned by the locale -- noting that some nodes + // are logically owned by multiple locales and therefore would + // redundantly be stored in both locales' surfaceNode arrays -- it's + // essentially local scratchspace that does not need to be communicated + // or kept coherent across locales. + // + + surfaceNode = 0; + + /* the free surfaces */ + + var freeSurface: sparse subdomain(Nodes); + + // initialize the free surface + initFreeSurface(freeSurface); + + forall n in freeSurface do + surfaceNode[n] = 1; + + forall e in Elems do { + var mask: int; + for i in 1..nodesPerElem do + mask += surfaceNode[elemToNode[e][i]] << (i-1); + + if ((mask & 0x0f) == 0x0f) then elemBC[e] |= ZETA_M_FREE; + if ((mask & 0xf0) == 0xf0) then elemBC[e] |= ZETA_P_FREE; + if ((mask & 0x33) == 0x33) then elemBC[e] |= ETA_M_FREE; + if ((mask & 0xcc) == 0xcc) then elemBC[e] |= ETA_P_FREE; + if ((mask & 0x99) == 0x99) then elemBC[e] |= XI_M_FREE; + if ((mask & 0x66) == 0x66) then elemBC[e] |= XI_P_FREE; + } + + if debug { + writeln("elemBC:"); + for b in elemBC do writeln(b); + } + + return loc; +} + + +/* Helper functions */ + +inline proc localizeNeighborNodes(eli: index(Elems), + x: [] real, ref x_local: 8*real, + y: [] real, ref y_local: 8*real, + z: [] real, ref z_local: 8*real) { + + for param i in 1..nodesPerElem { + const noi = elemToNode[eli][i]; + + x_local[i] = x[noi]; + y_local[i] = y[noi]; + z_local[i] = z[noi]; + } +} + +inline proc TripleProduct(x1, y1, z1, x2, y2, z2, x3, y3, z3) { + return x1*(y2*z3 - z2*y3) + x2*(z1*y3 - y1*z3) + x3*(y1*z2 - z1*y2); +} + + +proc CalcElemVolume(x, y, z) { + const dx61 = x[7] - x[2], + dy61 = y[7] - y[2], + dz61 = z[7] - z[2], + + dx70 = x[8] - x[1], + dy70 = y[8] - y[1], + dz70 = z[8] - z[1], + + dx63 = x[7] - x[4], + dy63 = y[7] - y[4], + dz63 = z[7] - z[4], + + dx20 = x[3] - x[1], + dy20 = y[3] - y[1], + dz20 = z[3] - z[1], + + dx50 = x[6] - x[1], + dy50 = y[6] - y[1], + dz50 = z[6] - z[1], + + dx64 = x[7] - x[5], + dy64 = y[7] - y[5], + dz64 = z[7] - z[5], + + dx31 = x[4] - x[2], + dy31 = y[4] - y[2], + dz31 = z[4] - z[2], + + dx72 = x[8] - x[3], + dy72 = y[8] - y[3], + dz72 = z[8] - z[3], + + dx43 = x[5] - x[4], + dy43 = y[5] - y[4], + dz43 = z[5] - z[4], + + dx57 = x[6] - x[8], + dy57 = y[6] - y[8], + dz57 = z[6] - z[8], + + dx14 = x[2] - x[5], + dy14 = y[2] - y[5], + dz14 = z[2] - z[5], + + dx25 = x[3] - x[6], + dy25 = y[3] - y[6], + dz25 = z[3] - z[6]; + + const volume = TripleProduct(dx31 + dx72, dx63, dx20, + dy31 + dy72, dy63, dy20, + dz31 + dz72, dz63, dz20) + + TripleProduct(dx43 + dx57, dx64, dx70, + dy43 + dy57, dy64, dy70, + dz43 + dz57, dz64, dz70) + + TripleProduct(dx14 + dx25, dx61, dx50, + dy14 + dy25, dy61, dy50, + dz14 + dz25, dz61, dz50); + + return volume / 12.0; +} + +proc InitStressTermsForElems(p, q, sigxx, sigyy, sigzz: [?D] real) { + forall i in D { + sigxx[i] = -p[i] - q[i]; + sigyy[i] = -p[i] - q[i]; + sigzz[i] = -p[i] - q[i]; + } +} + + +proc CalcElemShapeFunctionDerivatives(x: 8*real, y: 8*real, z: 8*real, + ref b_x: 8*real, + ref b_y: 8*real, + ref b_z: 8*real, + ref volume: real) { + + const fjxxi = .125 * ((x[7]-x[1]) + (x[6]-x[4]) - (x[8]-x[2]) - (x[5]-x[3])), + fjxet = .125 * ((x[7]-x[1]) - (x[6]-x[4]) + (x[8]-x[2]) - (x[5]-x[3])), + fjxze = .125 * ((x[7]-x[1]) + (x[6]-x[4]) + (x[8]-x[2]) + (x[5]-x[3])), + + fjyxi = .125 * ((y[7]-y[1]) + (y[6]-y[4]) - (y[8]-y[2]) - (y[5]-y[3])), + fjyet = .125 * ((y[7]-y[1]) - (y[6]-y[4]) + (y[8]-y[2]) - (y[5]-y[3])), + fjyze = .125 * ((y[7]-y[1]) + (y[6]-y[4]) + (y[8]-y[2]) + (y[5]-y[3])), + + fjzxi = .125 * ((z[7]-z[1]) + (z[6]-z[4]) - (z[8]-z[2]) - (z[5]-z[3])), + fjzet = .125 * ((z[7]-z[1]) - (z[6]-z[4]) + (z[8]-z[2]) - (z[5]-z[3])), + fjzze = .125 * ((z[7]-z[1]) + (z[6]-z[4]) + (z[8]-z[2]) + (z[5]-z[3])); + + /* compute cofactors */ + const cjxxi = (fjyet * fjzze) - (fjzet * fjyze), + cjxet = - (fjyxi * fjzze) + (fjzxi * fjyze), + cjxze = (fjyxi * fjzet) - (fjzxi * fjyet), + + cjyxi = - (fjxet * fjzze) + (fjzet * fjxze), + cjyet = (fjxxi * fjzze) - (fjzxi * fjxze), + cjyze = - (fjxxi * fjzet) + (fjzxi * fjxet), + + cjzxi = (fjxet * fjyze) - (fjyet * fjxze), + cjzet = - (fjxxi * fjyze) + (fjyxi * fjxze), + cjzze = (fjxxi * fjyet) - (fjyxi * fjxet); + + /* calculate partials : + this need only be done for l = 0,1,2,3 since , by symmetry , + (6,7,4,5) = - (0,1,2,3) . + */ + b_x[1] = - cjxxi - cjxet - cjxze; + b_x[2] = cjxxi - cjxet - cjxze; + b_x[3] = cjxxi + cjxet - cjxze; + b_x[4] = - cjxxi + cjxet - cjxze; + b_x[5] = -b_x[3]; + b_x[6] = -b_x[4]; + b_x[7] = -b_x[1]; + b_x[8] = -b_x[2]; + + b_y[1] = - cjyxi - cjyet - cjyze; + b_y[2] = cjyxi - cjyet - cjyze; + b_y[3] = cjyxi + cjyet - cjyze; + b_y[4] = - cjyxi + cjyet - cjyze; + b_y[5] = -b_y[3]; + b_y[6] = -b_y[4]; + b_y[7] = -b_y[1]; + b_y[8] = -b_y[2]; + + b_z[1] = - cjzxi - cjzet - cjzze; + b_z[2] = cjzxi - cjzet - cjzze; + b_z[3] = cjzxi + cjzet - cjzze; + b_z[4] = - cjzxi + cjzet - cjzze; + b_z[5] = -b_z[3]; + b_z[6] = -b_z[4]; + b_z[7] = -b_z[1]; + b_z[8] = -b_z[2]; + + /* calculate jacobian determinant (volume) */ + volume = 8.0 * ( fjxet * cjxet + fjyet * cjyet + fjzet * cjzet); +} + + +proc CalcElemNodeNormals(ref pfx: 8*real, ref pfy: 8*real, ref pfz: 8*real, + x: 8*real, y: 8*real, z: 8*real) { + + proc ElemFaceNormal(param n1, param n2, param n3, param n4) { + const bisectX0 = 0.5 * (x[n4] + x[n3] - x[n2] - x[n1]), + bisectY0 = 0.5 * (y[n4] + y[n3] - y[n2] - y[n1]), + bisectZ0 = 0.5 * (z[n4] + z[n3] - z[n2] - z[n1]), + bisectX1 = 0.5 * (x[n3] + x[n2] - x[n4] - x[n1]), + bisectY1 = 0.5 * (y[n3] + y[n2] - y[n4] - y[n1]), + bisectZ1 = 0.5 * (z[n3] + z[n2] - z[n4] - z[n1]), + areaX = 0.25 * (bisectY0 * bisectZ1 - bisectZ0 * bisectY1), + areaY = 0.25 * (bisectZ0 * bisectX1 - bisectX0 * bisectZ1), + areaZ = 0.25 * (bisectX0 * bisectY1 - bisectY0 * bisectX1); + + var rx, ry, rz: 8*real; //results + + (rx[n1], rx[n2], rx[n3], rx[n4]) = (areaX, areaX, areaX, areaX); + (ry[n1], ry[n2], ry[n3], ry[n4]) = (areaY, areaY, areaY, areaY); + (rz[n1], rz[n2], rz[n3], rz[n4]) = (areaZ, areaZ, areaZ, areaZ); + + return (rx, ry, rz); + } + + // calculate total normal from each face (faces are made up of + // combinations of nodes) + + (pfx, pfy, pfz) = ElemFaceNormal(1,2,3,4) + ElemFaceNormal(1,5,6,2) + + ElemFaceNormal(2,6,7,3) + ElemFaceNormal(3,7,8,4) + + ElemFaceNormal(4,8,5,1) + ElemFaceNormal(5,8,7,6); +} + + +proc SumElemStressesToNodeForces(b_x: 8*real, b_y: 8*real, b_z: 8*real, + stress_xx:real, + stress_yy:real, + stress_zz: real, + ref fx: 8*real, + ref fy: 8*real, + ref fz: 8*real) { + for param i in 1..8 { + fx[i] = -(stress_xx * b_x[i]); + fy[i] = -(stress_yy * b_y[i]); + fz[i] = -(stress_zz * b_z[i]); + } +} + +proc CalcElemVolumeDerivative(x: 8*real, y: 8*real, z: 8*real) { + + proc VoluDer(param n0, param n1, param n2, param n3, param n4, param n5) { + const ox = (y[n1] + y[n2]) * (z[n0] + z[n1]) + - (y[n0] + y[n1]) * (z[n1] + z[n2]) + + (y[n0] + y[n4]) * (z[n3] + z[n4]) + - (y[n3] + y[n4]) * (z[n0] + z[n4]) + - (y[n2] + y[n5]) * (z[n3] + z[n5]) + + (y[n3] + y[n5]) * (z[n2] + z[n5]), + oy = - (x[n1] + x[n2]) * (z[n0] + z[n1]) + + (x[n0] + x[n1]) * (z[n1] + z[n2]) + - (x[n0] + x[n4]) * (z[n3] + z[n4]) + + (x[n3] + x[n4]) * (z[n0] + z[n4]) + + (x[n2] + x[n5]) * (z[n3] + z[n5]) + - (x[n3] + x[n5]) * (z[n2] + z[n5]), + oz = - (y[n1] + y[n2]) * (x[n0] + x[n1]) + + (y[n0] + y[n1]) * (x[n1] + x[n2]) + - (y[n0] + y[n4]) * (x[n3] + x[n4]) + + (y[n3] + y[n4]) * (x[n0] + x[n4]) + + (y[n2] + y[n5]) * (x[n3] + x[n5]) + - (y[n3] + y[n5]) * (x[n2] + x[n5]); + + return (ox/12.0, oy/12.0, oz/12.0); + } + + var dvdx, dvdy, dvdz: 8*real; + + (dvdx[1], dvdy[1], dvdz[1]) = VoluDer(2,3,4,5,6,8); + (dvdx[4], dvdy[4], dvdz[4]) = VoluDer(1,2,3,8,5,7); + (dvdx[3], dvdy[3], dvdz[3]) = VoluDer(4,1,2,7,8,6); + (dvdx[2], dvdy[2], dvdz[2]) = VoluDer(3,4,1,6,7,5); + (dvdx[5], dvdy[5], dvdz[5]) = VoluDer(8,7,6,1,4,2); + (dvdx[6], dvdy[6], dvdz[6]) = VoluDer(5,8,7,2,1,3); + (dvdx[7], dvdy[7], dvdz[7]) = VoluDer(6,5,8,3,2,4); + (dvdx[8], dvdy[8], dvdz[8]) = VoluDer(7,6,5,4,3,1); + + return (dvdx, dvdy, dvdz); +} + +inline proc CalcElemFBHourglassForce(xd: 8*real, yd: 8*real, zd: 8*real, + hourgam: 8*(4*real), + coefficient: real, + ref hgfx: 8*real, + ref hgfy: 8*real, + ref hgfz: 8*real) { + var hx, hy, hz: 4*real; + + // reduction + for param i in 1..4 { + for param j in 1..8 { + hx[i] += hourgam[j][i] * xd[j]; + hy[i] += hourgam[j][i] * yd[j]; + hz[i] += hourgam[j][i] * zd[j]; + } + } + + for param i in 1..8 { + var shx, shy, shz: real; + for param j in 1..4 { + shx += hourgam[i][j] * hx[j]; + shy += hourgam[i][j] * hy[j]; + shz += hourgam[i][j] * hz[j]; + } + hgfx[i] = coefficient * shx; + hgfy[i] = coefficient * shy; + hgfz[i] = coefficient * shz; + } +} + + +proc CalcElemCharacteristicLength(x, y, z, volume) { + proc AreaFace(param p0, param p1, param p2, param p3) { + const fx = (x[p2] - x[p0]) - (x[p3] - x[p1]), + fy = (y[p2] - y[p0]) - (y[p3] - y[p1]), + fz = (z[p2] - z[p0]) - (z[p3] - z[p1]), + gx = (x[p2] - x[p0]) + (x[p3] - x[p1]), + gy = (y[p2] - y[p0]) + (y[p3] - y[p1]), + gz = (z[p2] - z[p0]) + (z[p3] - z[p1]), + area = (fx * fx + fy * fy + fz * fz) * + (gx * gx + gy * gy + gz * gz) - + (fx * gx + fy * gy + fz * gz) * + (fx * gx + fy * gy + fz * gz); + + return area ; + } + + const charLength = max(AreaFace(1, 2, 3, 4), + AreaFace(5, 6, 7, 8), + AreaFace(1, 2, 6, 5), + AreaFace(2, 3, 7, 6), + AreaFace(3, 4, 8, 7), + AreaFace(4, 1, 5, 8)); + + return 4.0 * volume / sqrt(charLength); +} + + +proc CalcElemVelocityGradient(xvel, yvel, zvel, pfx, pfy, pfz, + detJ, ref d: 6*real) { + const inv_detJ = 1.0 / detJ; + + d[1] = inv_detJ * ( pfx[1] * (xvel[1]-xvel[7]) + + pfx[2] * (xvel[2]-xvel[8]) + + pfx[3] * (xvel[3]-xvel[5]) + + pfx[4] * (xvel[4]-xvel[6]) ); + d[2] = inv_detJ * ( pfy[1] * (yvel[1]-yvel[7]) + + pfy[2] * (yvel[2]-yvel[8]) + + pfy[3] * (yvel[3]-yvel[5]) + + pfy[4] * (yvel[4]-yvel[6]) ); + d[3] = inv_detJ * ( pfz[1] * (zvel[1]-zvel[7]) + + pfz[2] * (zvel[2]-zvel[8]) + + pfz[3] * (zvel[3]-zvel[5]) + + pfz[4] * (zvel[4]-zvel[6]) ); + + const dyddx = inv_detJ * ( pfx[1] * (yvel[1]-yvel[7]) + + pfx[2] * (yvel[2]-yvel[8]) + + pfx[3] * (yvel[3]-yvel[5]) + + pfx[4] * (yvel[4]-yvel[6]) ), + + dxddy = inv_detJ * ( pfy[1] * (xvel[1]-xvel[7]) + + pfy[2] * (xvel[2]-xvel[8]) + + pfy[3] * (xvel[3]-xvel[5]) + + pfy[4] * (xvel[4]-xvel[6]) ), + + dzddx = inv_detJ * ( pfx[1] * (zvel[1]-zvel[7]) + + pfx[2] * (zvel[2]-zvel[8]) + + pfx[3] * (zvel[3]-zvel[5]) + + pfx[4] * (zvel[4]-zvel[6]) ), + + dxddz = inv_detJ * ( pfz[1] * (xvel[1]-xvel[7]) + + pfz[2] * (xvel[2]-xvel[8]) + + pfz[3] * (xvel[3]-xvel[5]) + + pfz[4] * (xvel[4]-xvel[6]) ), + + dzddy = inv_detJ * ( pfy[1] * (zvel[1]-zvel[7]) + + pfy[2] * (zvel[2]-zvel[8]) + + pfy[3] * (zvel[3]-zvel[5]) + + pfy[4] * (zvel[4]-zvel[6]) ), + + dyddz = inv_detJ * ( pfz[1] * (yvel[1]-yvel[7]) + + pfz[2] * (yvel[2]-yvel[8]) + + pfz[3] * (yvel[3]-yvel[5]) + + pfz[4] * (yvel[4]-yvel[6]) ); + + d[6] = 0.5 * ( dxddy + dyddx ); + d[5] = 0.5 * ( dxddz + dzddx ); + d[4] = 0.5 * ( dzddy + dyddz ); +} + + +proc CalcPressureForElems(p_new: [?D] real, bvc, pbvc, + e_old, compression, vnewc, + pmin: real, p_cut: real, eosvmax: real) { + + // + // TODO: Uncomment local once sparse domain is distributed + // + forall i in D /* do local */ { + const c1s = 2.0 / 3.0; + bvc[i] = c1s * (compression[i] + 1.0); + pbvc[i] = c1s; + } + + forall i in D { + p_new[i] = bvc[i] * e_old[i]; + + if abs(p_new[i]) < p_cut then p_new[i] = 0.0; + if vnewc[i] >= eosvmax then p_new[i] = 0.0; //impossible? + if p_new[i] < pmin then p_new[i] = pmin; + } +} + + +proc TimeIncrement() { + var targetdt = stoptime - time; + + if dtfixed <= 0.0 && cycle != 0 { //don't do this the first cycle + var olddt = deltatime, + newdt = 1.0e20; + + if dtcourant < newdt then newdt = dtcourant / 2.0; + if dthydro < newdt then newdt = 2.0/3.0 * dthydro; + + const ratio = newdt / olddt; + if ratio >= 1.0 { + if ratio < deltatimemultlb then newdt = olddt; + else if ratio > deltatimemultub then newdt = olddt * deltatimemultub; + } + if newdt > dtmax then newdt = dtmax; + + deltatime = newdt; + } + + /* TRY TO PREVENT VERY SMALL SCALING ON THE NEXT CYCLE */ + if targetdt > deltatime && targetdt < (4.0/3.0 * deltatime) { + targetdt = 2.0/3.0 * deltatime; + } + if targetdt < deltatime then deltatime = targetdt; + + time += deltatime; + cycle += 1; +} + +inline proc LagrangeLeapFrog() { + /* calculate nodal forces, accelerations, velocities, positions, with + * applied boundary conditions and slide surface considerations */ + LagrangeNodal(); + + /* calculate element quantities (i.e. velocity gradient & q), and update + * material states */ + LagrangeElements(); + + CalcTimeConstraintsForElems(); +} + + +inline proc LagrangeNodal() { + CalcForceForNodes(); + + CalcAccelerationForNodes(); + + ApplyAccelerationBoundaryConditionsForNodes(); + + CalcVelocityForNodes(deltatime, u_cut); + + CalcPositionForNodes(deltatime); +} + + +inline proc LagrangeElements() { + CalcLagrangeElements(); + + /* Calculate Q. (Monotonic q option requires communication) */ + CalcQForElems(); + + ApplyMaterialPropertiesForElems(); + + UpdateVolumesForElems(); +} + + +inline proc CalcTimeConstraintsForElems() { + /* evaluate time constraint */ + CalcCourantConstraintForElems(); + + /* check hydro constraint */ + CalcHydroConstraintForElems(); +} + + +inline proc computeDTF(indx) { + const myvdov = vdov[indx]; + + if myvdov == 0.0 then + return max(real); + + const myarealg = arealg[indx]; + var dtf = ss[indx]**2; + if myvdov < 0.0 then + dtf += qqc2 * myarealg**2 * myvdov**2; + dtf = sqrt(dtf); + dtf = myarealg / dtf; + + return dtf; +} + + +proc CalcCourantConstraintForElems() { + const val = min reduce [indx in MatElems] computeDTF(indx); + + if (val != max(real)) then + dtcourant = val; +} + + +inline proc calcDtHydroTmp(indx) { + const myvdov = vdov[indx]; + if (myvdov == 0.0) then + return max(real); + else + return dvovmax / (abs(myvdov)+1.0e-20); +} + + +proc CalcHydroConstraintForElems() { + const val = min reduce [indx in MatElems] calcDtHydroTmp(indx); + + if (val != max(real)) then + dthydro = val; +} + + +/* calculate nodal forces, accelerations, velocities, positions, with + * applied boundary conditions and slide surface considerations */ + +proc CalcForceForNodes() { + //zero out all forces + forall x in fx do x.write(0); + forall y in fy do y.write(0); + forall z in fz do z.write(0); + + /* Calcforce calls partial, force, hourq */ + CalcVolumeForceForElems(); + + /* Calculate Nodal Forces at domain boundaries */ + // this was commented out in C++ code, so left out here +} + +proc CalcVolumeForceForElems() { + var sigxx, sigyy, sigzz, determ: [Elems] real; + + /* Sum contributions to total stress tensor */ + InitStressTermsForElems(p, q, sigxx, sigyy, sigzz); + + /* call elemlib stress integration loop to produce nodal forces from + material stresses. */ + IntegrateStressForElems(sigxx, sigyy, sigzz, determ); + + /* check for negative element volume */ + forall e in Elems { + if determ[e] <= 0.0 then + halt("can't have negative volume (determ[", e, "]=", determ[e], ")"); + } + + CalcHourglassControlForElems(determ); +} + + +proc IntegrateStressForElems(sigxx, sigyy, sigzz, determ) { + forall k in Elems { + var b_x, b_y, b_z: 8*real; + var x_local, y_local, z_local: 8*real; + localizeNeighborNodes(k, x, x_local, y, y_local, z, z_local); + + var fx_local, fy_local, fz_local: 8*real; + + local { + /* Volume calculation involves extra work for numerical consistency. */ + CalcElemShapeFunctionDerivatives(x_local, y_local, z_local, + b_x, b_y, b_z, determ[k]); + + CalcElemNodeNormals(b_x, b_y, b_z, x_local, y_local, z_local); + + SumElemStressesToNodeForces(b_x, b_y, b_z, sigxx[k], sigyy[k], sigzz[k], + fx_local, fy_local, fz_local); + } + + for (noi, t) in elemToNodesTuple(k) { + fx[noi].add(fx_local[t]); + fy[noi].add(fy_local[t]); + fz[noi].add(fz_local[t]); + } + } +} + + +proc CalcHourglassControlForElems(determ) { + var dvdx, dvdy, dvdz, x8n, y8n, z8n: [Elems] 8*real; + + forall eli in Elems { + /* Collect domain nodes to elem nodes */ + var x1, y1, z1: 8*real; + localizeNeighborNodes(eli, x, x1, y, y1, z, z1); + var pfx, pfy, pfz: 8*real; + + local { + /* load into temporary storage for FB Hour Glass control */ + (dvdx[eli], dvdy[eli], dvdz[eli]) = CalcElemVolumeDerivative(x1, y1, z1); + } + + x8n[eli] = x1; + y8n[eli] = y1; + z8n[eli] = z1; + + determ[eli] = volo[eli] * v[eli]; + } + + /* Do a check for negative volumes */ + forall e in Elems { + if v[e] <= 0.0 then + halt("can't have negative (or zero) volume. (in Hourglass, v[", e, "]=", v[e], ")"); + } + + if hgcoef > 0.0 { + CalcFBHourglassForceForElems(determ, x8n, y8n, z8n, dvdx, dvdy, dvdz); + } +} + + +const gammaCoef: 4*(8*real) = // WAS: [1..4, 1..8] real = + (( 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0), + ( 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0), + ( 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0), + (-1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0)); + +/* Calculates the Flanagan-Belytschko anti-hourglass force. */ +proc CalcFBHourglassForceForElems(determ, x8n, y8n, z8n, dvdx, dvdy, dvdz) { + + /* compute the hourglass modes */ + forall eli in Elems { + var hourgam: 8*(4*real); + const volinv = 1.0 / determ[eli]; + var ss1, mass1, volume13: real; + var hgfx, hgfy, hgfz: 8*real; + var coefficient: real; + + var xd1, yd1, zd1: 8*real; + localizeNeighborNodes(eli, xd, xd1, yd, yd1, zd, zd1); + + /* TODO: Can we enable this local block? */ + // local { + for param i in 1..4 { + var hourmodx, hourmody, hourmodz: real; + // reduction + for param j in 1..8 { + hourmodx += x8n[eli][j] * gammaCoef[i][j]; + hourmody += y8n[eli][j] * gammaCoef[i][j]; + hourmodz += z8n[eli][j] * gammaCoef[i][j]; + } + + for param j in 1..8 { + hourgam[j][i] = gammaCoef[i][j] - volinv * + (dvdx[eli][j] * hourmodx + + dvdy[eli][j] * hourmody + + dvdz[eli][j] * hourmodz); + } + } + + /* compute forces */ + /* store forces into h arrays (force arrays) */ + ss1 = ss[eli]; + mass1 = elemMass[eli]; + volume13 = cbrt(determ[eli]); + + coefficient = - hgcoef * 0.01 * ss1 * mass1 / volume13; + + CalcElemFBHourglassForce(xd1, yd1, zd1, hourgam, coefficient, + hgfx, hgfy, hgfz); + // } // end local + + for (noi,i) in elemToNodesTuple(eli) { + fx[noi].add(hgfx[i]); + fy[noi].add(hgfy[i]); + fz[noi].add(hgfz[i]); + } + } +} + + +proc CalcAccelerationForNodes() { + forall noi in Nodes do local { + xdd[noi] = fx[noi].read() / nodalMass[noi]; + ydd[noi] = fy[noi].read() / nodalMass[noi]; + zdd[noi] = fz[noi].read() / nodalMass[noi]; + } +} + + +proc ApplyAccelerationBoundaryConditionsForNodes() { + // TODO: Shouldn't we be able to write these as follows? + // + // xdd[XSym] = 0.0; + // ydd[YSym] = 0.0; + // zdd[ZSym] = 0.0; + + forall x in XSym do xdd[x] = 0.0; + forall y in YSym do ydd[y] = 0.0; + forall z in ZSym do zdd[z] = 0.0; +} + + +proc CalcVelocityForNodes(dt: real, u_cut: real) { + forall i in Nodes do local { + var xdtmp = xd[i] + xdd[i] * dt, + ydtmp = yd[i] + ydd[i] * dt, + zdtmp = zd[i] + zdd[i] * dt; + if abs(xdtmp) < u_cut then xdtmp = 0.0; + if abs(ydtmp) < u_cut then ydtmp = 0.0; + if abs(zdtmp) < u_cut then zdtmp = 0.0; + xd[i] = xdtmp; + yd[i] = ydtmp; + zd[i] = zdtmp; + } +} + + +proc CalcPositionForNodes(dt: real) { + forall ijk in Nodes { + x[ijk] += xd[ijk] * dt; + y[ijk] += yd[ijk] * dt; + z[ijk] += zd[ijk] * dt; + } +} + +// sungeun: Temporary array reused throughout +proc CalcLagrangeElements() { + var dxx, dyy, dzz: [Elems] real; + + CalcKinematicsForElems(dxx, dyy, dzz, deltatime); + + // element loop to do some stuff not included in the elemlib function. + forall k in Elems do local { + vdov[k] = dxx[k] + dyy[k] + dzz[k]; + var vdovthird = vdov[k] / 3.0; + dxx[k] -= vdovthird; + dyy[k] -= vdovthird; + dzz[k] -= vdovthird; + } + + // See if any volumes are negative, and take appropriate action. + forall e in Elems { + if vnew[e] <= 0.0 then + halt("can't have negative volume (vnew[", e, "]=", vnew[e], ")"); + } +} + + +proc CalcKinematicsForElems(dxx, dyy, dzz, const dt: real) { + // loop over all elements + forall k in Elems { + var b_x, b_y, b_z: 8*real, + d: 6*real, + detJ: real; + + //get nodal coordinates from global arrays and copy into local arrays + var x_local, y_local, z_local: 8*real; + localizeNeighborNodes(k, x, x_local, y, y_local, z, z_local); + + //get nodal velocities from global arrays and copy into local arrays + var xd_local, yd_local, zd_local: 8*real; + localizeNeighborNodes(k, xd, xd_local, yd, yd_local, zd, zd_local); + var dt2 = 0.5 * dt; //wish this was local, too... + + local { + //volume calculations + const volume = CalcElemVolume(x_local, y_local, z_local); + const relativeVolume = volume / volo[k]; + vnew[k] = relativeVolume; + delv[k] = relativeVolume - v[k]; + + //set characteristic length + arealg[k] = CalcElemCharacteristicLength(x_local, y_local, z_local, + volume); + + for param i in 1..8 { + x_local[i] -= dt2 * xd_local[i]; + y_local[i] -= dt2 * yd_local[i]; + z_local[i] -= dt2 * zd_local[i]; + } + + CalcElemShapeFunctionDerivatives(x_local, y_local, z_local, + b_x, b_y, b_z, detJ); + + CalcElemVelocityGradient(xd_local, yd_local, zd_local, b_x, b_y, b_z, + detJ, d); + + } + + // put velocity gradient quantities into their global arrays. + dxx[k] = d[1]; + dyy[k] = d[2]; + dzz[k] = d[3]; + } +} + + +// sungeun: Temporary array reused throughout +/* velocity gradient */ +var delv_xi, delv_eta, delv_zeta: [Elems] real; +/* position gradient */ +var delx_xi, delx_eta, delx_zeta: [Elems] real; + +proc CalcQForElems() { + // MONOTONIC Q option + + /* Calculate velocity gradients */ + CalcMonotonicQGradientsForElems(delv_xi, delv_eta, delv_zeta, + delx_xi, delx_eta, delx_zeta); + + /* Transfer veloctiy gradients in the first order elements */ + /* problem->commElements->Transfer(CommElements::monoQ) ; */ + CalcMonotonicQForElems(delv_xi, delv_eta, delv_zeta, + delx_xi, delx_eta, delx_zeta); + + /* Don't allow excessive artificial viscosity */ + forall e in Elems { + if q[e] > qstop then + halt("Excessive artificial viscosity! (q[", e, "]=", q[e], ")"); + } +} + + +// sungeun: Temporary array reused throughout +var vnewc: [MatElems] real; + +/* Expose all of the variables needed for material evaluation */ +proc ApplyMaterialPropertiesForElems() { + + forall i in MatElems do vnewc[i] = vnew[i]; + + if eosvmin != 0.0 then + [c in vnewc] if c < eosvmin then c = eosvmin; + + if eosvmax != 0.0 then + [c in vnewc] if c > eosvmax then c = eosvmax; + + + // old comment: The following loop should compute min/max reductions; + // currently, race-y + + forall matelm in MatElems { + var vc = v[matelm]; + if eosvmin != 0.0 && vc < eosvmin then vc = eosvmin; + if eosvmax != 0.0 && vc > eosvmax then vc = eosvmax; + if vc <= 0.0 { + writeln("Volume error (in ApplyMaterialProperiesForElems)."); + exit(1); + } + } + + EvalEOSForElems(vnewc); +} + + +proc UpdateVolumesForElems() { + forall i in Elems do local { + var tmpV = vnew[i]; + if abs(tmpV-1.0) < v_cut then tmpV = 1.0; + v[i] = tmpV; + } +} + + +proc CalcMonotonicQGradientsForElems(delv_xi, delv_eta, delv_zeta, + delx_xi, delx_eta, delx_zeta) { + forall eli in Elems { + const ptiny = 1.0e-36; + var xl, yl, zl: 8*real; + localizeNeighborNodes(eli, x, xl, y, yl, z, zl); + var xvl, yvl, zvl: 8*real; + localizeNeighborNodes(eli, xd, xvl, yd, yvl, zd, zvl); + + local { + const vol = volo[eli] * vnew[eli], + norm = 1.0 / (vol + ptiny); + var ax, ay, az, dxv, dyv, dzv: real; + + const dxj = -0.25*((xl[1]+xl[2]+xl[6]+xl[5])-(xl[4]+xl[3]+xl[7]+xl[8])), + dyj = -0.25*((yl[1]+yl[2]+yl[6]+yl[5])-(yl[4]+yl[3]+yl[7]+yl[8])), + dzj = -0.25*((zl[1]+zl[2]+zl[6]+zl[5])-(zl[4]+zl[3]+zl[7]+zl[8])), + + dxi = 0.25*((xl[2]+xl[3]+xl[7]+xl[6])-(xl[1]+xl[4]+xl[8]+xl[5])), + dyi = 0.25*((yl[2]+yl[3]+yl[7]+yl[6])-(yl[1]+yl[4]+yl[8]+yl[5])), + dzi = 0.25*((zl[2]+zl[3]+zl[7]+zl[6])-(zl[1]+zl[4]+zl[8]+zl[5])), + + dxk = 0.25*((xl[5]+xl[6]+xl[7]+xl[8])-(xl[1]+xl[2]+xl[3]+xl[4])), + dyk = 0.25*((yl[5]+yl[6]+yl[7]+yl[8])-(yl[1]+yl[2]+yl[3]+yl[4])), + dzk = 0.25*((zl[5]+zl[6]+zl[7]+zl[8])-(zl[1]+zl[2]+zl[3]+zl[4])); + + /* find delvk and delxk ( i cross j ) */ + + ax = dyi*dzj - dzi*dyj; + ay = dzi*dxj - dxi*dzj; + az = dxi*dyj - dyi*dxj; + + delx_zeta[eli] = vol / sqrt(ax*ax + ay*ay + az*az + ptiny); + + ax *= norm; + ay *= norm; + az *= norm; + + dxv = 0.25*((xvl[5]+xvl[6]+xvl[7]+xvl[8])-(xvl[1]+xvl[2]+xvl[3]+xvl[4])); + dyv = 0.25*((yvl[5]+yvl[6]+yvl[7]+yvl[8])-(yvl[1]+yvl[2]+yvl[3]+yvl[4])); + dzv = 0.25*((zvl[5]+zvl[6]+zvl[7]+zvl[8])-(zvl[1]+zvl[2]+zvl[3]+zvl[4])); + + delv_zeta[eli] = ax*dxv + ay*dyv + az*dzv; + + /* find delxi and delvi ( j cross k ) */ + + ax = dyj*dzk - dzj*dyk; + ay = dzj*dxk - dxj*dzk; + az = dxj*dyk - dyj*dxk; + + delx_xi[eli] = vol / sqrt(ax*ax + ay*ay + az*az + ptiny) ; + + ax *= norm; + ay *= norm; + az *= norm; + + dxv = 0.25*((xvl[2]+xvl[3]+xvl[7]+xvl[6])-(xvl[1]+xvl[4]+xvl[8]+xvl[5])); + dyv = 0.25*((yvl[2]+yvl[3]+yvl[7]+yvl[6])-(yvl[1]+yvl[4]+yvl[8]+yvl[5])); + dzv = 0.25*((zvl[2]+zvl[3]+zvl[7]+zvl[6])-(zvl[1]+zvl[4]+zvl[8]+zvl[5])); + + delv_xi[eli] = ax*dxv + ay*dyv + az*dzv ; + + /* find delxj and delvj ( k cross i ) */ + + ax = dyk*dzi - dzk*dyi; + ay = dzk*dxi - dxk*dzi; + az = dxk*dyi - dyk*dxi; + + delx_eta[eli] = vol / sqrt(ax*ax + ay*ay + az*az + ptiny) ; + + ax *= norm; + ay *= norm; + az *= norm; + + dxv= -0.25*((xvl[1]+xvl[2]+xvl[6]+xvl[5])-(xvl[4]+xvl[3]+xvl[7]+xvl[8])); + dyv= -0.25*((yvl[1]+yvl[2]+yvl[6]+yvl[5])-(yvl[4]+yvl[3]+yvl[7]+yvl[8])); + dzv= -0.25*((zvl[1]+zvl[2]+zvl[6]+zvl[5])-(zvl[4]+zvl[3]+zvl[7]+zvl[8])); + + delv_eta[eli] = ax*dxv + ay*dyv + az*dzv ; + } /* local */ + } /* forall eli */ +} + + +proc CalcMonotonicQForElems(delv_xi, delv_eta, delv_zeta, + delx_xi, delx_eta, delx_zeta) { + //got rid of call through to "CalcMonotonicQRegionForElems" + + forall i in MatElems { + const ptiny = 1.0e-36; + const bcMask = elemBC[i]; + var norm, delvm, delvp: real; + + /* phixi */ + norm = 1.0 / (delv_xi[i] + ptiny); + + select bcMask & XI_M { + when 0 do delvm = delv_xi[lxim(i)]; + when XI_M_SYMM do delvm = delv_xi[i]; + when XI_M_FREE do delvm = 0.0; + } + select bcMask & XI_P { + when 0 do delvp = delv_xi[lxip(i)]; + when XI_P_SYMM do delvp = delv_xi[i]; + when XI_P_FREE do delvp = 0.0; + } + + delvm *= norm; + delvp *= norm; + + var phixi = 0.5 * (delvm + delvp); + + delvm *= monoq_limiter_mult; + delvp *= monoq_limiter_mult; + + if delvm < phixi then phixi = delvm; + if delvp < phixi then phixi = delvp; + if phixi < 0 then phixi = 0.0; + if phixi > monoq_max_slope then phixi = monoq_max_slope; + + /* phieta */ + norm = 1.0 / (delv_eta[i] + ptiny); + + select bcMask & ETA_M { + when 0 do delvm = delv_eta[letam[i]]; + when ETA_M_SYMM do delvm = delv_eta[i]; + when ETA_M_FREE do delvm = 0.0; + } + select bcMask & ETA_P { + when 0 do delvp = delv_eta[letap[i]]; + when ETA_P_SYMM do delvp = delv_eta[i]; + when ETA_P_FREE do delvp = 0.0; + } + + delvm = delvm * norm; + delvp = delvp * norm; + + var phieta = 0.5 * (delvm + delvp); + + delvm *= monoq_limiter_mult; + delvp *= monoq_limiter_mult; + + if delvm < phieta then phieta = delvm; + if delvp < phieta then phieta = delvp; + if phieta < 0.0 then phieta = 0.0; + if phieta > monoq_max_slope then phieta = monoq_max_slope; + + /* phizeta */ + norm = 1.0 / (delv_zeta[i] + ptiny); + + select bcMask & ZETA_M { + when 0 do delvm = delv_zeta[lzetam[i]]; + when ZETA_M_SYMM do delvm = delv_zeta[i]; + when ZETA_M_FREE do delvm = 0.0; + } + select bcMask & ZETA_P { + when 0 do delvp = delv_zeta[lzetap[i]]; + when ZETA_P_SYMM do delvp = delv_zeta[i]; + when ZETA_P_FREE do delvp = 0.0; + } + + delvm = delvm * norm; + delvp = delvp * norm; + + var phizeta = 0.5 * (delvm + delvp); + + delvm *= monoq_limiter_mult; + delvp *= monoq_limiter_mult; + + if delvm < phizeta then phizeta = delvm; + if delvp < phizeta then phizeta = delvp; + if phizeta < 0.0 then phizeta = 0.0; + if phizeta > monoq_max_slope then phizeta = monoq_max_slope; + + /* Remove length scale */ + var qlin, qquad: real; + if vdov[i] > 0.0 { + qlin = 0.0; + qquad = 0.0; + } else { + var delvxxi = delv_xi[i] * delx_xi[i], + delvxeta = delv_eta[i] * delx_eta[i], + delvxzeta = delv_zeta[i] * delx_zeta[i]; + + if delvxxi > 0.0 then delvxxi = 0.0; + if delvxeta > 0.0 then delvxeta = 0.0; + if delvxzeta > 0.0 then delvxzeta = 0.0; + + const rho = elemMass[i] / (volo[i] * vnew[i]); + + qlin = -qlc_monoq * rho * + ( delvxxi * (1.0 - phixi) + + delvxeta * (1.0 - phieta) + + delvxzeta * (1.0 - phizeta)); + + qquad = qqc_monoq * rho * + ( delvxxi**2 * (1.0 - phixi**2) + + delvxeta**2 * (1.0 - phieta**2) + + delvxzeta**2 * (1.0 - phizeta**2)); + } + qq[i] = qquad; + ql[i] = qlin; + + } +} + + +proc EvalEOSForElems(vnewc) { + const rho0 = refdens; + + var e_old, delvc, p_old, q_old, compression, compHalfStep, + qq_old, ql_old, work, p_new, e_new, q_new, bvc, pbvc: [Elems] real; + + /* compress data, minimal set */ + forall i in MatElems { + e_old[i] = e[i]; + delvc[i] = delv[i]; + p_old[i] = p[i]; + q_old[i] = q[i]; + qq_old[i] = qq[i]; + ql_old[i] = ql[i]; + } + + // + // TODO: Uncomment local once sparse domain is distributed + // + forall i in Elems /* do local */ { + compression[i] = 1.0 / vnewc[i] - 1.0; + const vchalf = vnewc[i] - delvc[i] * 0.5; + compHalfStep[i] = 1.0 / vchalf - 1.0; + } + + /* Check for v > eosvmax or v < eosvmin */ + // (note: I think this was already checked for in calling function!) + if eosvmin != 0.0 { + forall i in Elems { + if vnewc[i] <= eosvmin then compHalfStep[i] = compression[i]; + } + } + if eosvmax != 0.0 { + forall i in Elems { + if vnewc[i] >= eosvmax { + p_old[i] = 0.0; + compression[i] = 0.0; + compHalfStep[i] = 0.0; + } + } + } + + CalcEnergyForElems(p_new, e_new, q_new, bvc, pbvc, + p_old, e_old, q_old, compression, compHalfStep, + vnewc, work, delvc, qq_old, ql_old); + + forall i in MatElems { + p[i] = p_new[i]; + e[i] = e_new[i]; + q[i] = q_new[i]; + } + + CalcSoundSpeedForElems(vnewc, rho0, e_new, p_new, pbvc, bvc); +} + + +proc CalcEnergyForElems(p_new, e_new, q_new, bvc, pbvc, + p_old, e_old, q_old, compression, compHalfStep, + vnewc, work, delvc, qq_old, ql_old) { + // TODO: might need to move these consts into foralls or global + // Otherwise, they live on Locale0 and everyone else has to do + // remote reads. OR: Check if they're remote value forwarded. + const rho0 = refdens; + const sixth = 1.0 / 6.0; + + var pHalfStep: [MatElems] real; + + forall i in Elems { + e_new[i] = e_old[i] - 0.5 * delvc[i] * (p_old[i] + q_old[i]) + + 0.5 * work[i]; + if e_new[i] < emin then e_new[i] = emin; + } + + CalcPressureForElems(pHalfStep, bvc, pbvc, e_new, compHalfStep, + vnewc, pmin, p_cut, eosvmax); + + forall i in Elems { + const vhalf = 1.0 / (1.0 + compHalfStep[i]); + + if delvc[i] > 0.0 { + q_new[i] = 0.0; + } else { + var ssc = (pbvc[i] * e_new[i] + vhalf**2 * bvc[i] * pHalfStep[i]) / rho0; + if ssc <= 0.0 then ssc = 0.333333e-36; + else ssc = sqrt(ssc); + q_new[i] = ssc * ql_old[i] + qq_old[i]; + } + + e_new[i] += 0.5 * delvc[i] + * (3.0*(p_old[i] + q_old[i]) - 4.0*(pHalfStep[i] + q_new[i])); + } + forall i in Elems { + e_new[i] += 0.5 * work[i]; + if abs(e_new[i] < e_cut) then e_new[i] = 0.0; + if e_new[i] < emin then e_new[i] = emin; + } + + CalcPressureForElems(p_new, bvc, pbvc, e_new, compression, vnewc, pmin, + p_cut, eosvmax); + + forall i in Elems { + var q_tilde:real; + + if delvc[i] > 0.0 { + q_tilde = 0.0; + } else { + var ssc = (pbvc[i] * e_new[i] + vnewc[i]**2 * bvc[i] * p_new[i] ) / rho0; + if ssc <= 0.0 then ssc = 0.333333e-36; + else ssc = sqrt(ssc); + q_tilde = ssc * ql_old[i] + qq_old[i]; + } + + e_new[i] -= (7.0*(p_old[i] + q_old[i]) + - 8.0*(pHalfStep[i] + q_new[i]) + + (p_new[i] + q_tilde)) * delvc[i] * sixth; + if abs(e_new[i]) < e_cut then e_new[i] = 0.0; + if e_new[i] < emin then e_new[i] = emin; + } + + CalcPressureForElems(p_new, bvc, pbvc, e_new, compression, vnewc, pmin, + p_cut, eosvmax); + + + // + // TODO: Uncomment local once sparse domain is distributed + // + forall i in Elems /* do local */ { + if delvc[i] <= 0.0 { + var ssc = (pbvc[i] * e_new[i] + vnewc[i]**2 * bvc[i] * p_new[i] ) / rho0; + if ssc <= 0.0 then ssc = 0.333333e-36; + else ssc = sqrt(ssc); + q_new[i] = ssc * ql_old[i] + qq_old[i]; + if abs(q_new[i]) < q_cut then q_new[i] = 0.0; + } + } +} + + +proc CalcSoundSpeedForElems(vnewc, rho0:real, enewc, pnewc, pbvc, bvc) { + // TODO: Open question: If we had multiple materials, should (a) ss + // be zeroed and accumulated into, and (b) updated atomically to + // avoid losing updates? (Jeff will go back and think on this) + // + forall i in MatElems { + var ssTmp = (pbvc[i] * enewc[i] + vnewc[i]**2 * bvc[i] * pnewc[i]) / rho0; + if ssTmp <= 1.111111e-36 then ssTmp = 1.111111e-36; + ss[i] = sqrt(ssTmp); + } +} + + +iter elemToNodes(elem) { + for param i in 1..nodesPerElem do + yield elemToNode[elem][i]; +} + +iter elemToNodesTuple(e) { + for i in 1..nodesPerElem do + yield (elemToNode[e][i], i); +} + + +proc deprint(title:string, x:[?D] real, y:[D]real, z:[D]real) { + writeln(title); + for i in D { + writeln(format("%3d", i), ": ", + format("%3.4e", x[i]), " ", + format("%3.4e", y[i]), " ", + format("%3.4e", z[i])); + } +} + + diff --git a/samples/Chapel/nbody.chpl b/samples/Chapel/nbody.chpl new file mode 100644 index 00000000..f46e4f68 --- /dev/null +++ b/samples/Chapel/nbody.chpl @@ -0,0 +1,147 @@ +/* The Computer Language Benchmarks Game + http://benchmarksgame.alioth.debian.org/ + + contributed by Albert Sidelnik + modified by Brad Chamberlain +*/ + + +// +// The number of timesteps to simulate; may be set via the command-line +// +config const n = 10000; + +// +// Constants representing pi, the solar mass, and the number of days per year +// +const pi = 3.141592653589793, + solarMass = 4 * pi**2, + daysPerYear = 365.24; + +// +// a record representing one of the bodies in the solar system +// +record body { + var pos: 3*real; + var v: 3*real; + var mass: real; // does not change after it is set up +} + +// +// the array of bodies that we'll be simulating +// +var bodies = [/* sun */ + new body(mass = solarMass), + + /* jupiter */ + new body(pos = ( 4.84143144246472090e+00, + -1.16032004402742839e+00, + -1.03622044471123109e-01), + v = ( 1.66007664274403694e-03 * daysPerYear, + 7.69901118419740425e-03 * daysPerYear, + -6.90460016972063023e-05 * daysPerYear), + mass = 9.54791938424326609e-04 * solarMass), + + /* saturn */ + new body(pos = ( 8.34336671824457987e+00, + 4.12479856412430479e+00, + -4.03523417114321381e-01), + v = (-2.76742510726862411e-03 * daysPerYear, + 4.99852801234917238e-03 * daysPerYear, + 2.30417297573763929e-05 * daysPerYear), + mass = 2.85885980666130812e-04 * solarMass), + + /* uranus */ + new body(pos = ( 1.28943695621391310e+01, + -1.51111514016986312e+01, + -2.23307578892655734e-01), + v = ( 2.96460137564761618e-03 * daysPerYear, + 2.37847173959480950e-03 * daysPerYear, + -2.96589568540237556e-05 * daysPerYear), + mass = 4.36624404335156298e-05 * solarMass), + + /* neptune */ + new body(pos = ( 1.53796971148509165e+01, + -2.59193146099879641e+01, + 1.79258772950371181e-01), + v = ( 2.68067772490389322e-03 * daysPerYear, + 1.62824170038242295e-03 * daysPerYear, + -9.51592254519715870e-05 * daysPerYear), + mass = 5.15138902046611451e-05 * solarMass) + ]; + +// +// the number of bodies to be simulated +// +const numbodies = bodies.numElements; + +// +// The computation involves initializing the sun's velocity, +// writing the initial energy, advancing the system through 'n' +// timesteps, and writing the final energy. +// +proc main() { + initSun(); + + writef("%.9r\n", energy()); + for 1..n do + advance(0.01); + writef("%.9r\n", energy()); +} + +// +// compute the sun's initial velocity +// +proc initSun() { + const p = + reduce (for b in bodies do (b.v * b.mass)); + bodies[1].v = -p / solarMass; +} + +// +// advance the positions and velocities of all the bodies +// +proc advance(dt) { + for i in 1..numbodies { + for j in i+1..numbodies { + updateVelocities(bodies[i], bodies[j]); + + inline proc updateVelocities(ref b1, ref b2) { + const dpos = b1.pos - b2.pos, + mag = dt / sqrt(sumOfSquares(dpos))**3; + + b1.v -= dpos * b2.mass * mag; + b2.v += dpos * b1.mass * mag; + } + } + } + + for b in bodies do + b.pos += dt * b.v; +} + +// +// compute the energy of the bodies +// +proc energy() { + var e = 0.0; + + for i in 1..numbodies { + const b1 = bodies[i]; + + e += 0.5 * b1.mass * sumOfSquares(b1.v); + + for j in i+1..numbodies { + const b2 = bodies[j]; + + e -= (b1.mass * b2.mass) / sqrt(sumOfSquares(b1.pos - b2.pos)); + } + } + + return e; +} + +// +// a helper routine to compute the sum of squares of a 3-tuple's components +// +inline proc sumOfSquares(x) + return x(1)**2 + x(2)**2 + x(3)**2; diff --git a/samples/Chapel/quicksort.chpl b/samples/Chapel/quicksort.chpl new file mode 100644 index 00000000..33d9722c --- /dev/null +++ b/samples/Chapel/quicksort.chpl @@ -0,0 +1,145 @@ +// +// An example of a parallel quick sort implementation that uses +// "cobegin" to make each recursive call in parallel and "serial" to +// limit the number of threads. +// + +use Random, Time; // for random number generation and the Timer class + +var timer: Timer; // to time the sort + +config var n: int = 2**15; // the size of the array to be sorted +config var thresh: int = 1; // the recursive depth to serialize +config var verbose: int = 0; // print out this many elements in array +config var timing: bool = true; // set timing to false to disable timer + +var A: [1..n] real; // array of real numbers + +// +// initialize array with random numbers +// +fillRandom(A); + +// +// print out front of array if verbose flag is set +// +if verbose > 0 then + writeln("A[1..", verbose, "] = ", A[1..verbose]); + +// +// start timer, call parallel quick sort routine, stop timer +// +if timing then timer.start(); +pqsort(A, thresh); +if timing then timer.stop(); + +// +// report sort time +// +if timing then writeln("sorted in ", timer.elapsed(), " seconds"); + +// +// print out front of array if verbose flag is set +// values should now be in sorted order +// +if verbose > 0 then + writeln("A[1..", verbose, "] = ", A[1..verbose]); + +// +// verify that array is sorted or halt +// +for i in 2..n do + if A(i) < A(i-1) then + halt("A(", i-1, ") == ", A(i-1), " > A(", i, ") == ", A(i)); + +writeln("verification success"); + +// +// pqsort -- parallel quick sort +// +// arr: generic 1D array of values (real, int, ...) +// thresh: number of recursive calls to make before serializing +// low: lower bound of array to start sort at, defaults to whole array +// high: upper bound of array to stop sort at, defaults to whole array +// +proc pqsort(arr: [], + thresh: int, + low: int = arr.domain.low, + high: int = arr.domain.high) where arr.rank == 1 { + + // + // base case: arr[low..high] is small enough to bubble sort + // + if high - low < 8 { + bubbleSort(arr, low, high); + return; + } + + // + // determine pivot and partition arr[low..high] + // + const pivotVal = findPivot(); + const pivotLoc = partition(pivotVal); + + // + // make recursive calls to parallel quick sort each unsorted half of + // the array; if thresh is 0 or less, start executing conquer tasks + // serially + // + serial thresh <= 0 do cobegin { + pqsort(arr, thresh-1, low, pivotLoc-1); + pqsort(arr, thresh-1, pivotLoc+1, high); + } + + // + // findPivot -- helper routine to find pivot value using simple + // median-of-3 method, returns pivot value + // + proc findPivot() { + const mid = low + (high-low+1) / 2; + + if arr(mid) < arr(low) then arr(mid) <=> arr(low); + if arr(high) < arr(low) then arr(high) <=> arr(low); + if arr(high) < arr(mid) then arr(high) <=> arr(mid); + + const pivotVal = arr(mid); + arr(mid) = arr(high-1); + arr(high-1) = pivotVal; + + return pivotVal; + } + + // + // partition -- helper routine to partition array such that all + // values less than pivot are to its left and all + // values greater than pivot are to its right, returns + // pivot location + // + proc partition(pivotVal) { + var ilo = low, ihi = high-1; + while (ilo < ihi) { + do { ilo += 1; } while arr(ilo) < pivotVal; + do { ihi -= 1; } while pivotVal < arr(ihi); + if (ilo < ihi) { + arr(ilo) <=> arr(ihi); + } + } + arr(high-1) = arr(ilo); + arr(ilo) = pivotVal; + return ilo; + } +} + +// +// bubbleSort -- bubble sort for base case of quick sort +// +// arr: generic 1D array of values (real, int, ...) +// low: lower bound of array to start sort at +// high: upper bound of array to stop sort at +// +proc bubbleSort(arr: [], low: int, high: int) where arr.rank == 1 { + for i in low..high do + for j in low..high-1 do + if arr(j) > arr(j+1) then + arr(j) <=> arr(j+1); +}