Add Chapel parallel programming language.

Includes several example programs from source distribution.
This commit is contained in:
Thomas Van Doren
2014-07-02 14:51:15 -07:00
parent e274196441
commit a6ccce7b76
7 changed files with 3097 additions and 3 deletions

View File

@@ -0,0 +1,304 @@
//
// Distributions Primer
//
// This primer demonstrates uses of some of Chapel's standard
// distributions. To use these distributions in a Chapel program,
// the respective module must be used:
//
use BlockDist, CyclicDist, BlockCycDist, ReplicatedDist;
use DimensionalDist2D, ReplicatedDim, BlockCycDim;
//
// For each distribution, we'll create a distributed domain and array
// and then initialize it just to give a brief flavor of how the
// distribution maps across locales. Running this example on 6
// locales does a nice job of illustrating the distribution
// characteristics.
//
// All of these distributions support options to map to a different
// virtual locale grid than the one used by default (a
// multidimensional factoring of the built-in Locales array), as well
// as to control the amount of parallelism used in data parallel
// loops. See the Standard Distributions chapter of the language spec
// for more details.
//
//
// Make the program size configurable from the command line.
//
config const n = 8;
//
// Declare a 2-dimensional domain Space that we will later use to
// initialize the distributed domains.
//
const Space = {1..n, 1..n};
//
// The Block distribution distributes a bounding box from
// n-dimensional space across the target locale array viewed as an
// n-dimensional virtual locale grid. The bounding box is blocked
// into roughly equal portions across the locales. Note that domains
// declared over a Block distribution can also store indices outside
// of the bounding box; the bounding box is merely used to compute
// the blocking of space.
//
// In this example, we declare a 2-dimensional Block-distributed
// domain BlockSpace and a Block-distributed array BA declared over
// the domain.
//
const BlockSpace = Space dmapped Block(boundingBox=Space);
var BA: [BlockSpace] int;
//
// To illustrate how the index set is distributed across locales,
// we'll use a forall loop to initialize each array element to the
// locale ID that stores that index/element/iteration.
//
forall ba in BA do
ba = here.id;
//
// Output the Block-distributed array to visually see how the elements
// are partitioned across the locales.
//
writeln("Block Array Index Map");
writeln(BA);
writeln();
//
// Most of Chapel's standard distributions support an optional
// targetLocales argument that permits you to pass in your own
// array of locales to be targeted. In general, the targetLocales
// argument should match the rank of the distribution. So for
// example, to map a Block to a [numLocales x 1] view of the
// locale set, one could do something like this:
//
// We start by creating our own array of the locale values. Here
// we use the standard array reshape function for convenience,
// but more generally, this array could be accessed/assigned like any
// other.
//
var MyLocaleView = {0..#numLocales, 1..1};
var MyLocales: [MyLocaleView] locale = reshape(Locales, MyLocaleView);
//
// Then we'll declare a distributed domain/array that targets
// this view of the locales:
//
const BlockSpace2 = Space dmapped Block(boundingBox=Space,
targetLocales=MyLocales);
var BA2: [BlockSpace2] int;
//
// Then we'll do a similar computation as before to verify where
// everything ended up:
//
forall ba in BA2 do
ba = here.id;
writeln("Block Array Index Map");
writeln(BA2);
writeln();
//
// Next, we'll perform a similar computation for the Cyclic distribution.
// Cyclic distributions start at a designated n-dimensional index and
// distribute the n-dimensional space across an n-dimensional array
// of locales in a round-robin fashion (in each dimension). As with
// the Block distribution, domains may be declared using the
// distribution who have lower indices that the starting index; that
// value should just be considered a parameterization of how the
// distribution is defined.
//
const CyclicSpace = Space dmapped Cyclic(startIdx=Space.low);
var CA: [CyclicSpace] int;
forall ca in CA do
ca = here.id;
writeln("Cyclic Array Index Map");
writeln(CA);
writeln();
//
// Next, we'll declare a Block-Cyclic distribution. These
// distributions also deal out indices in a round-robin fashion,
// but rather than dealing out singleton indices, they deal out blocks
// of indices. Thus, the BlockCyclic distribution is parameterized
// by a starting index (as with Cyclic) and a block size (per
// dimension) specifying how large the chunks to be dealt out are.
//
const BlkCycSpace = Space dmapped BlockCyclic(startIdx=Space.low,
blocksize=(2, 3));
var BCA: [BlkCycSpace] int;
forall bca in BCA do
bca = here.id;
writeln("Block-Cyclic Array Index Map");
writeln(BCA);
writeln();
//
// The ReplicatedDist distribution is different: each of the
// original domain's indices - and the corresponding array elements -
// is replicated onto each locale. (Note: consistency among these
// array replicands is NOT maintained automatically.)
//
// This replication is observable in some cases but not others,
// as shown below. Note: this behavior may change in the future.
//
const ReplicatedSpace = Space dmapped ReplicatedDist();
var RA: [ReplicatedSpace] int;
// The replication is observable - this visits each replicand.
forall ra in RA do
ra = here.id;
writeln("Replicated Array Index Map, ", RA.numElements, " elements total");
writeln(RA);
writeln();
//
// The replication is observable when the replicated array is
// on the left-hand side. If the right-hand side is not replicated,
// it is copied into each replicand.
// We illustrate this using a non-distributed array.
//
var A: [Space] int = [(i,j) in Space] i*100 + j;
RA = A;
writeln("Replicated Array after being array-assigned into");
writeln(RA);
writeln();
//
// Analogously, each replicand will be visited and
// other participated expressions will be computed on each locale
// (a) when the replicated array is assigned a scalar:
// RA = 5;
// (b) when it appears first in a zippered forall loop:
// forall (ra, a) in zip(RA, A) do ...;
// (c) when it appears in a for loop:
// for ra in RA do ...;
//
// Zippering (RA,A) or (A,RA) in a 'for' loop will generate
// an error due to their different number of elements.
// Let RA store the Index Map again, for the examples below.
forall ra in RA do
ra = here.id;
//
// Only the local replicand is accessed - replication is NOT observable
// and consistency is NOT maintained - when:
// (a) the replicated array is indexed - an individual element is read...
//
on Locales(0) do
writeln("on ", here, ": ", RA(Space.low));
on Locales(LocaleSpace.high) do
writeln("on ", here, ": ", RA(Space.low));
writeln();
// ...or an individual element is written;
on Locales(LocaleSpace.high) do
RA(Space.low) = 7777;
writeln("Replicated Array after being indexed into");
writeln(RA);
writeln();
//
// (b) the replicated array is on the right-hand side of an assignment...
//
on Locales(LocaleSpace.high) do
A = RA + 4;
writeln("Non-Replicated Array after assignment from Replicated Array + 4");
writeln(A);
writeln();
//
// (c) ...or, generally, the replicated array or domain participates
// in a zippered forall loop, but not in the first position.
// The loop could look like:
//
// forall (a, (i,j), ra) in (A, ReplicatedSpace, RA) do ...;
//
//
// The DimensionalDist2D distribution lets us build a 2D distribution
// as a composition of specifiers for individual dimensions.
// Under such a "dimensional" distribution each dimension is handled
// independently of the other.
//
// The dimension specifiers are similar to the corresponding multi-dimensional
// distributions in constructor arguments and index-to-locale mapping rules.
// However, instead of an array of locales, a specifier constructor
// accepts just the number of locales that the indices in the corresponding
// dimension will be distributed across.
//
// The DimensionalDist2D constructor requires:
// * an [0..nl1-1, 0..nl2-1] array of locales, where
// nl1 and nl2 are the number of locales in each dimension, and
// * two dimension specifiers, created for nl1 and nl2 locale counts, resp.
//
// Presently, the following dimension specifiers are available
// (shown here with their constructor arguments):
//
// * ReplicatedDim(numLocales)
// * BlockDim(numLocales, boundingBoxLow, boundingBoxHigh)
// * BlockCyclicDim(lowIdx, blockSize, numLocales)
//
//
// The following example creates a dimensional distribution that
// replicates over 2 locales (when available) in the first dimemsion
// and distributes using block-cyclic distribution in the second dimension.
// The example computes nl1 and nl2 and reshapes MyLocales correspondingly.
//
var (nl1, nl2) = if numLocales == 1 then (1, 1) else (2, numLocales/2);
MyLocaleView = {0..#nl1, 0..#nl2};
MyLocales = reshape(Locales[0..#nl1*nl2], MyLocaleView);
const DimReplicatedBlockcyclicSpace = Space
dmapped DimensionalDist2D(MyLocales,
new ReplicatedDim(numLocales = nl1),
new BlockCyclicDim(numLocales = nl2,
lowIdx = 1, blockSize = 2));
var DRBA: [DimReplicatedBlockcyclicSpace] int;
// The ReplicatedDim specifier always accesses the local replicand.
// (This differs from how the ReplicatedDist distribution works.)
//
// This example visits each replicand. The behavior is the same
// regardless of the second index into MyLocales below.
for locId1 in 0..#nl1 do on MyLocales[locId1, 0] {
forall drba in DRBA do
drba = here.id;
writeln("Dimensional2D(Replicated,BlockCyclic) Array Index Map",
" from ", here);
// Technicality: 'writeln(DRBA)' would read DRBA always on Locale 0.
// Since we want to see what DRBA contains on the current locale,
// we use 'Helper' that is mapped using the default distribution.
// 'Helper = DRBA' captures the view of DRBA on the current locale,
// which we then print out.
const Helper: [Space] int = DRBA;
writeln(Helper);
writeln();
}

View File

@@ -0,0 +1 @@
writeln("Hello, world!"); // print 'Hello, world!' to the console

1692
samples/Chapel/lulesh.chpl Normal file

File diff suppressed because it is too large Load Diff

147
samples/Chapel/nbody.chpl Normal file
View File

@@ -0,0 +1,147 @@
/* The Computer Language Benchmarks Game
http://benchmarksgame.alioth.debian.org/
contributed by Albert Sidelnik
modified by Brad Chamberlain
*/
//
// The number of timesteps to simulate; may be set via the command-line
//
config const n = 10000;
//
// Constants representing pi, the solar mass, and the number of days per year
//
const pi = 3.141592653589793,
solarMass = 4 * pi**2,
daysPerYear = 365.24;
//
// a record representing one of the bodies in the solar system
//
record body {
var pos: 3*real;
var v: 3*real;
var mass: real; // does not change after it is set up
}
//
// the array of bodies that we'll be simulating
//
var bodies = [/* sun */
new body(mass = solarMass),
/* jupiter */
new body(pos = ( 4.84143144246472090e+00,
-1.16032004402742839e+00,
-1.03622044471123109e-01),
v = ( 1.66007664274403694e-03 * daysPerYear,
7.69901118419740425e-03 * daysPerYear,
-6.90460016972063023e-05 * daysPerYear),
mass = 9.54791938424326609e-04 * solarMass),
/* saturn */
new body(pos = ( 8.34336671824457987e+00,
4.12479856412430479e+00,
-4.03523417114321381e-01),
v = (-2.76742510726862411e-03 * daysPerYear,
4.99852801234917238e-03 * daysPerYear,
2.30417297573763929e-05 * daysPerYear),
mass = 2.85885980666130812e-04 * solarMass),
/* uranus */
new body(pos = ( 1.28943695621391310e+01,
-1.51111514016986312e+01,
-2.23307578892655734e-01),
v = ( 2.96460137564761618e-03 * daysPerYear,
2.37847173959480950e-03 * daysPerYear,
-2.96589568540237556e-05 * daysPerYear),
mass = 4.36624404335156298e-05 * solarMass),
/* neptune */
new body(pos = ( 1.53796971148509165e+01,
-2.59193146099879641e+01,
1.79258772950371181e-01),
v = ( 2.68067772490389322e-03 * daysPerYear,
1.62824170038242295e-03 * daysPerYear,
-9.51592254519715870e-05 * daysPerYear),
mass = 5.15138902046611451e-05 * solarMass)
];
//
// the number of bodies to be simulated
//
const numbodies = bodies.numElements;
//
// The computation involves initializing the sun's velocity,
// writing the initial energy, advancing the system through 'n'
// timesteps, and writing the final energy.
//
proc main() {
initSun();
writef("%.9r\n", energy());
for 1..n do
advance(0.01);
writef("%.9r\n", energy());
}
//
// compute the sun's initial velocity
//
proc initSun() {
const p = + reduce (for b in bodies do (b.v * b.mass));
bodies[1].v = -p / solarMass;
}
//
// advance the positions and velocities of all the bodies
//
proc advance(dt) {
for i in 1..numbodies {
for j in i+1..numbodies {
updateVelocities(bodies[i], bodies[j]);
inline proc updateVelocities(ref b1, ref b2) {
const dpos = b1.pos - b2.pos,
mag = dt / sqrt(sumOfSquares(dpos))**3;
b1.v -= dpos * b2.mass * mag;
b2.v += dpos * b1.mass * mag;
}
}
}
for b in bodies do
b.pos += dt * b.v;
}
//
// compute the energy of the bodies
//
proc energy() {
var e = 0.0;
for i in 1..numbodies {
const b1 = bodies[i];
e += 0.5 * b1.mass * sumOfSquares(b1.v);
for j in i+1..numbodies {
const b2 = bodies[j];
e -= (b1.mass * b2.mass) / sqrt(sumOfSquares(b1.pos - b2.pos));
}
}
return e;
}
//
// a helper routine to compute the sum of squares of a 3-tuple's components
//
inline proc sumOfSquares(x)
return x(1)**2 + x(2)**2 + x(3)**2;

View File

@@ -0,0 +1,145 @@
//
// An example of a parallel quick sort implementation that uses
// "cobegin" to make each recursive call in parallel and "serial" to
// limit the number of threads.
//
use Random, Time; // for random number generation and the Timer class
var timer: Timer; // to time the sort
config var n: int = 2**15; // the size of the array to be sorted
config var thresh: int = 1; // the recursive depth to serialize
config var verbose: int = 0; // print out this many elements in array
config var timing: bool = true; // set timing to false to disable timer
var A: [1..n] real; // array of real numbers
//
// initialize array with random numbers
//
fillRandom(A);
//
// print out front of array if verbose flag is set
//
if verbose > 0 then
writeln("A[1..", verbose, "] = ", A[1..verbose]);
//
// start timer, call parallel quick sort routine, stop timer
//
if timing then timer.start();
pqsort(A, thresh);
if timing then timer.stop();
//
// report sort time
//
if timing then writeln("sorted in ", timer.elapsed(), " seconds");
//
// print out front of array if verbose flag is set
// values should now be in sorted order
//
if verbose > 0 then
writeln("A[1..", verbose, "] = ", A[1..verbose]);
//
// verify that array is sorted or halt
//
for i in 2..n do
if A(i) < A(i-1) then
halt("A(", i-1, ") == ", A(i-1), " > A(", i, ") == ", A(i));
writeln("verification success");
//
// pqsort -- parallel quick sort
//
// arr: generic 1D array of values (real, int, ...)
// thresh: number of recursive calls to make before serializing
// low: lower bound of array to start sort at, defaults to whole array
// high: upper bound of array to stop sort at, defaults to whole array
//
proc pqsort(arr: [],
thresh: int,
low: int = arr.domain.low,
high: int = arr.domain.high) where arr.rank == 1 {
//
// base case: arr[low..high] is small enough to bubble sort
//
if high - low < 8 {
bubbleSort(arr, low, high);
return;
}
//
// determine pivot and partition arr[low..high]
//
const pivotVal = findPivot();
const pivotLoc = partition(pivotVal);
//
// make recursive calls to parallel quick sort each unsorted half of
// the array; if thresh is 0 or less, start executing conquer tasks
// serially
//
serial thresh <= 0 do cobegin {
pqsort(arr, thresh-1, low, pivotLoc-1);
pqsort(arr, thresh-1, pivotLoc+1, high);
}
//
// findPivot -- helper routine to find pivot value using simple
// median-of-3 method, returns pivot value
//
proc findPivot() {
const mid = low + (high-low+1) / 2;
if arr(mid) < arr(low) then arr(mid) <=> arr(low);
if arr(high) < arr(low) then arr(high) <=> arr(low);
if arr(high) < arr(mid) then arr(high) <=> arr(mid);
const pivotVal = arr(mid);
arr(mid) = arr(high-1);
arr(high-1) = pivotVal;
return pivotVal;
}
//
// partition -- helper routine to partition array such that all
// values less than pivot are to its left and all
// values greater than pivot are to its right, returns
// pivot location
//
proc partition(pivotVal) {
var ilo = low, ihi = high-1;
while (ilo < ihi) {
do { ilo += 1; } while arr(ilo) < pivotVal;
do { ihi -= 1; } while pivotVal < arr(ihi);
if (ilo < ihi) {
arr(ilo) <=> arr(ihi);
}
}
arr(high-1) = arr(ilo);
arr(ilo) = pivotVal;
return ilo;
}
}
//
// bubbleSort -- bubble sort for base case of quick sort
//
// arr: generic 1D array of values (real, int, ...)
// low: lower bound of array to start sort at
// high: upper bound of array to stop sort at
//
proc bubbleSort(arr: [], low: int, high: int) where arr.rank == 1 {
for i in low..high do
for j in low..high-1 do
if arr(j) > arr(j+1) then
arr(j) <=> arr(j+1);
}