Changes between Version 12 and Version 13 of AstroBearAmr


Ignore:
Timestamp:
05/23/11 17:47:55 (14 years ago)
Author:
Brandon Shroyer
Comment:

Legend:

Unmodified
Added
Removed
Modified
  • AstroBearAmr

    v12 v13  
    325325
    326326[[BR]]
    327 == Communication ==
     327== Round 5: Communication ==
     328
     329At this point, the basic AMR algorithm has been assembled, and we turn our attention to the code used to run AstroBEAR on more than one processor.  While much of this code is invisible at the {{{AMR()}}} level, there are a few high-level routines included to manage large groups of messages.
    328330
    329331Communication in AstroBEAR comes in two varieties: ''data'' communication, which is concerned with passing actual grid data between processors, and ''tree'' communication, which passes AMR tree data.
    330332
    331 [[BR]]
    332 === Data Communication ===
     333{{{
     334   RECURSIVE SUBROUTINE AMR(n)
     335     USE TreeLevelComms
     336     USE DataLevelComms
     337     INTEGER :: n, nSteps, step
     338     INTEGER :: iErr
     339
     340
     341     IF (n <= 0) nSteps=1
     342     IF (n >  0) nSteps = 2
     343     IF (n > BaseLevel) THEN
     344        CALL CompRecvGridsFromParents(n)
     345        CALL SortNodes(n)
     346        CALL CompSendGridsToChildren(n-1)         
     347        IF (n > -1) THEN
     348           CALL InitInfos(n)
     349           IF (n > 0)  CALL PostRecvParentsData(n)
     350        END IF
     351        CALL PostRecvOverlapsNeighbors(n)         
     352        CALL PostRecvOldNodeOverlaps(n)
     353        IF (n > 0) THEN
     354           CALL CompRecvParentsData(n)         
     355           CALL ProlongateParentsData(n)     
     356        END IF
     357        CALL CompRecvOverlapsNeighbors(n)       
     358        IF (n > -1)  CALL ChildMaskOverlaps(n)
     359        CALL CompRecvOldNodeOverlaps(n)       
     360     END IF
     361
     362     DO step=1,nSteps
     363        levels(n)%CurrentLevelStep=levels(n)%CurrentLevelStep+1
     364        levels(n)%step=step
     365
     366        IF (step == 2) CALL UpdateOverlaps(n)
     367        IF (n > -1) THEN
     368           CALL GetLevelLoad(n)         
     369           CALL PostRecvOverlaps(n)       
     370           CALL PostSendOverlaps(n)       
     371           CALL ApplyOverlaps(n,step)
     372           CALL CompRecvOverlaps(n)                   
     373        END IF
     374
     375        IF (n > 0)  CALL AfterOverlaps(n)
     376        IF (n > -1) THEN
     377           CALL ParticleUpdate(n)
     378           CALL ApplyPhysicalBCs(n)
     379# if defined HYPRE
     380        CALL ApplyEllipticBC(n)
     381# endif
     382        END IF
     383        IF (n < MaxLevel) THEN
     384           IF (n > -1)  CALL SetErrFlags(n)
     385           IF (step == 2 .OR. n <= BaseLevel)  CALL AgeNodesChildren(n)
     386           CALL AgeNodes(n+1)
     387           CALL CreateChildrens(n)
     388           CALL DistributeChildrens(n)
     389           CALL PostSendGridsToChildren(n)         
     390           CALL PostRecvGridsFromParents(n+1)
     391           IF (n > -1)  CALL PostSendChildrenData(n)
     392           CALL PostRecvNeighboringChildren(n)
     393           CALL PostSendNeighboringChildren(n)
     394           IF (step == 1 .AND. n > BaseLevel) THEN
     395              CALL PostRecvOverlappingChildrenFromOldNodes(n) 
     396              CALL PostRecvOverlappingChildrenFromNewNodes(n)
     397              CALL PostSendOverlappingChildrenToOldNodes(n)   
     398              CALL PostSendOverlappingChildrenToNewNodes(n)
     399              CALL InheritOldNodeOverlapsChildren(n)
     400              CALL InheritNewNodeOverlapsChildren(n)
     401              CALL InheritNeighborsChildren(n)
     402              CALL CompRecvOverlappingChildrenFromOldNodes(n)   
     403              CALL CompRecvOverlappingChildrenFromNewNodes(n)   
     404              CALL PostSendOverlapsToOldNodesChildren(n)
     405              CALL CompRecvNeighboringChildren(n)   
     406           ELSE
     407              CALL InheritOverlapsOldChildren(n)
     408              CALL InheritNeighborsChildren(n)
     409              CALL CompRecvNeighboringChildren(n)   
     410              CALL InheritOverlapsNewChildren(n)
     411              CALL PostSendOverlapsToNodesOldChildren(n)       
     412           END IF
     413           CALL PostSendOverlapsNeighbors(n)         
     414           IF (n > -1)  CALL PostRecvChildrenData(n)         
     415        END IF
     416        IF (n > -1)  CALL BeforeGlobalStep(n)
     417
     418        IF (n < MaxLevel) THEN
     419
     420           IF (n > -1) THEN
     421              CALL ApplyChildrenData(n)
     422              CALL CompSendChildrenData(n)         
     423           END IF
     424           CALL CompSendNeighboringChildren(n)     
     425           IF (step == 1 .AND. n > BaseLevel) THEN
     426              CALL CompSendOverlappingChildrenToOldNodes(n)
     427              CALL CompSendOverlappingChildrenToNewNodes(n)
     428              CALL CompSendOverlapsToOldNodesChildren(n)       
     429           ELSE
     430              CALL CompSendOverlapsToNodesOldChildren(n)       
     431           END IF
     432           CALL CompSendOverlapsNeighbors(n)
     433           IF (n > -1) THEN
     434              CALL CompRecvChildrenData(n)         
     435              CALL CompSendParentsData(n+1)         
     436           END IF
     437        END IF
     438        IF (n > -1) THEN
     439           CALL RestrictionFixups(n)
     440           CALL AfterFixups(n)
     441        END IF
     442        IF (n > -1) THEN
     443           CALL PostRecvFluxes(n)         
     444           CALL PostSendFluxes(n)     
     445           IF (iThreaded == 0 .AND. n > 0)  CALL WaitingAdvances(n)
     446           CALL PrintAdvance(n)           
     447#if defined HYPRE                                           
     448           IF (lElliptic)  CALL Elliptic(n)
     449#endif
     450           IF (n < MaxLevel)  CALL UpdateChildMasks(n)
     451           CALL SyncFluxes(n)
     452           CALL CompRecvFluxes(n) 
     453        END IF
     454        IF (n > 0)  CALL AccumulateFluxes(n)
     455        IF (n > -1) THEN
     456           CALL CompSendOverlaps(n)         
     457           CALL CompSendFluxes(n)         
     458        END IF
     459        IF (step == 2)  CALL NullifyNeighbors(n)
     460        IF (RestartStep) EXIT
     461     END DO
     462     IF (n > 0) THEN
     463        CALL CoarsenDataForParents(n)
     464        CALL PostSendParentsData(n)                   
     465     END IF
     466
     467   END SUBROUTINE AMR
     468
     469}}}
     470[[BR]]
     471==== Data Communication ====
    333472There are four basic data routines that involve sharing of data between grids:
    334473 * {{{ProlongateParentsData()}}} -- Parents to children (Inter-Level)
     
    355494
    356495[[BR]]
    357 === Tree Communication ===
     496==== Tree Communication ====
    358497
    359498There are five tree operations that require some communication between processors:
     
    367506As in the case with data operations, each of these requires four communication calls ({{{PostSend}}}, {{{PostRecv}}}, {{{CompSend}}}, {{{CompRecv}}}) in order to overlap the computation with communication.  In all of these cases, it is the node's children that are being communicated.
    368507
    369 [[BR]]
    370 == Threading ==
    371 
    372 
    373 There are several threading options for parallelizing the hydro advance across levels.  There are currently three basic approaches to address this
    374  * Threading the Advances - The advancing of each level can be done independently although higher level threads should have higher priorities
    375  * Threading the AMR levels - Each AMR level can also be thought of as an independent thread.  Unfortunately this approach requires threads to communicate with other threads on different processors.  This requires MPI to be extremely thread safe
    376  * !PseudoThreading - This is essentially careful scheduling of the advances to try and mimic the switching that would occur under a threaded implementation.  This has the advance of not requiring any external libraries.
     508'''''IMPORTANT:'''''  {{{AMR()}}} is a recursive algorithm, and many of the communications are inter-level communications.  Consequently, a send or receive might be posted on one level and completed on another, which can make the algorithm tricky to follow.  If you are a new user stepping through the AMR algorithm for the first time, start your traversal at {{{n = -2}}} and follow along as you add levels.
     509
     510[[BR]]
     511== Round 6:  Scheduling and Threading ==
     512
     513Several attempts have been made to incorporate [http://en.wikipedia.org/wiki/Thread_(computer_science) threads] into AstroBEAR in order to achieve global load balancing.  There are still formidable technical issues to overcome with threading, so for the time being AstroBEAR uses a "pseudo-threaded" scheduling approach that mimics threading through careful management of the advances across all levels.
     514
     515The scheduling code introduces three new subroutines into {{{AMR()}}}:
     516
     517* {{{ScheduledAdvanceGrids(n)}}} -- Calculate the workload for level {{{n}}}.
     518* {{{WaitingAdvances(n)}}} -- Advances grids on level {{{n}}}, and if there is time advances coarser grids while waiting for the other level {{{n}}} advances to finish.
     519* {{{CompleteAdvanceGrids(n)}}} -- Finishes advancing grids on level {{{n}}}.
     520
     521The main advantage of the scheduling approach is the lack of external libraries.  Implementing threads in a Fortran-based code like AstroBEAR requires specialized libraries or wrappers for POSIX threads.  These libraries are not available on all clusters, which would require us to set up additional libraries on any machine where we wanted to run AstroBEAR.
     522
     523We are considering two possible approaches for including threads in AstroBEAR:
     524 * Use threads to make the advance step of each level independent.  Higher-level threads will need higher priorities, since their data is required to finish the lower-level steps.
     525 * Assign ''all'' of a level's operations to a thread.  This approach would be promising, but it requires threads to communicate with other threads on different processors.  This is a risky proposition, as older version of MPI are not thread-safe.
     526
     527Threads introduce three new subroutines into {{{AMR()}}}:
     528
     529* {{{ThreadsInit()}}} -- Initializes thread variables and the threading environment.
     530* {{{LaunchAdvanceThread(n)}}} -- Creates a new thread for level {{{n}}}.
     531* {{{JoinAdvanceThread(n)}}} -- Rejoins the level-{{{n}}} thread with the main program after it has finished its advance.
     532
    377533For more information on threading see the [ScramblerThreading Scrambler Threading page].
     534
     535
     536The final iteration of the AMR algorithm (minus the various {{{Timer()}}} calls) looks like this:
     537
     538{{{
     539   RECURSIVE SUBROUTINE AMR(n)
     540     USE TreeLevelComms
     541     USE DataLevelComms
     542     INTEGER :: n, nSteps, step
     543     INTEGER :: iErr
     544
     545
     546     IF (n <= 0) nSteps=1
     547     IF (n >  0) nSteps = 2
     548     IF (n > BaseLevel) THEN
     549        CALL CompRecvGridsFromParents(n)
     550        CALL SortNodes(n)
     551        CALL CompSendGridsToChildren(n-1)         
     552        IF (n > -1) THEN
     553           CALL InitInfos(n)
     554           IF (n > 0)  CALL PostRecvParentsData(n)
     555        END IF
     556        CALL PostRecvOverlapsNeighbors(n)         
     557        CALL PostRecvOldNodeOverlaps(n)
     558        IF (n > 0) THEN
     559           CALL CompRecvParentsData(n)         
     560           CALL ProlongateParentsData(n)     
     561        END IF
     562        CALL CompRecvOverlapsNeighbors(n)       
     563        IF (n > -1)  CALL ChildMaskOverlaps(n)
     564        CALL CompRecvOldNodeOverlaps(n)       
     565     END IF
     566
     567     DO step=1,nSteps
     568        levels(n)%CurrentLevelStep=levels(n)%CurrentLevelStep+1
     569        levels(n)%step=step
     570
     571        IF (step == 2) CALL UpdateOverlaps(n)
     572        IF (n > -1) THEN
     573           CALL GetLevelLoad(n)         
     574           CALL PostRecvOverlaps(n)       
     575           CALL PostSendOverlaps(n)       
     576           CALL ApplyOverlaps(n,step)
     577           CALL CompRecvOverlaps(n)                   
     578        END IF
     579
     580        IF (n > 0)  CALL AfterOverlaps(n)
     581        IF (n > -1) THEN
     582           CALL ParticleUpdate(n)
     583           CALL ApplyPhysicalBCs(n)
     584# if defined HYPRE
     585        CALL ApplyEllipticBC(n)
     586# endif
     587        END IF
     588        IF (n < MaxLevel) THEN
     589           IF (n > -1)  CALL SetErrFlags(n)
     590           IF (step == 2 .OR. n <= BaseLevel)  CALL AgeNodesChildren(n)
     591           CALL AgeNodes(n+1)
     592           CALL CreateChildrens(n)
     593           CALL DistributeChildrens(n)
     594           CALL PostSendGridsToChildren(n)         
     595           CALL PostRecvGridsFromParents(n+1)
     596           IF (n > -1)  CALL PostSendChildrenData(n)
     597           CALL PostRecvNeighboringChildren(n)
     598           CALL PostSendNeighboringChildren(n)
     599           IF (step == 1 .AND. n > BaseLevel) THEN
     600              CALL PostRecvOverlappingChildrenFromOldNodes(n) 
     601              CALL PostRecvOverlappingChildrenFromNewNodes(n)
     602              CALL PostSendOverlappingChildrenToOldNodes(n)   
     603              CALL PostSendOverlappingChildrenToNewNodes(n)
     604              CALL InheritOldNodeOverlapsChildren(n)
     605              CALL InheritNewNodeOverlapsChildren(n)
     606              CALL InheritNeighborsChildren(n)
     607              CALL CompRecvOverlappingChildrenFromOldNodes(n)   
     608              CALL CompRecvOverlappingChildrenFromNewNodes(n)   
     609              CALL PostSendOverlapsToOldNodesChildren(n)
     610              CALL CompRecvNeighboringChildren(n)   
     611           ELSE
     612              CALL InheritOverlapsOldChildren(n)
     613              CALL InheritNeighborsChildren(n)
     614              CALL CompRecvNeighboringChildren(n)   
     615              CALL InheritOverlapsNewChildren(n)
     616              CALL PostSendOverlapsToNodesOldChildren(n)       
     617           END IF
     618           CALL PostSendOverlapsNeighbors(n)         
     619           IF (n > -1)  CALL PostRecvChildrenData(n)         
     620        END IF
     621        IF (n > -1)  CALL BeforeGlobalStep(n)
     622
     623        !-------------------------------- Threading options -------------------------------------
     624
     625        !Option 1:  Just create threads to do the advancing on each level
     626        IF (iThreaded <= 0) THEN
     627           IF (n > -1 .AND. iThreaded == 0)  CALL ScheduledAdvanceGrids(n)
     628           IF (n < MaxLevel) CALL AMR(n+1)
     629           IF (n > -1) THEN
     630              IF (iThreaded == 0) THEN                 
     631                 CALL CompleteAdvanceGrids(n)
     632              ELSE
     633                 CALL AdvanceGrids(n)
     634              END IF
     635           END IF
     636#if defined PTHREADS
     637        ELSEIF (iThreaded > 0) THEN           
     638           IF (n > -1)  CALL LaunchAdvanceThread(n)
     639           IF (n < MaxLevel) CALL AMR(n+1)
     640           IF (n > -1)  CALL JoinAdvanceThread(n)
     641# endif
     642        END IF
     643        ! ---------------------------- End threading options --------------------------------------
     644
     645        IF (n < MaxLevel) THEN
     646
     647           IF (n > -1) THEN
     648              CALL ApplyChildrenData(n)
     649              CALL CompSendChildrenData(n)         
     650           END IF
     651           CALL CompSendNeighboringChildren(n)     
     652           IF (step == 1 .AND. n > BaseLevel) THEN
     653              CALL CompSendOverlappingChildrenToOldNodes(n)
     654              CALL CompSendOverlappingChildrenToNewNodes(n)
     655              CALL CompSendOverlapsToOldNodesChildren(n)       
     656           ELSE
     657              CALL CompSendOverlapsToNodesOldChildren(n)       
     658           END IF
     659           CALL CompSendOverlapsNeighbors(n)
     660           IF (n > -1) THEN
     661              CALL CompRecvChildrenData(n)         
     662              CALL CompSendParentsData(n+1)         
     663           END IF
     664        END IF
     665        IF (n > -1) THEN
     666           CALL RestrictionFixups(n)
     667           CALL AfterFixups(n)
     668        END IF
     669        IF (n > -1) THEN
     670           CALL PostRecvFluxes(n)         
     671           CALL PostSendFluxes(n)     
     672           IF (iThreaded == 0 .AND. n > 0)  CALL WaitingAdvances(n)
     673           CALL PrintAdvance(n)           
     674#if defined HYPRE                                           
     675           IF (lElliptic)  CALL Elliptic(n)
     676#endif
     677           IF (n < MaxLevel)  CALL UpdateChildMasks(n)
     678           CALL SyncFluxes(n)
     679           CALL CompRecvFluxes(n) 
     680        END IF
     681        IF (n > 0)  CALL AccumulateFluxes(n)
     682        IF (n > -1) THEN
     683           CALL CompSendOverlaps(n)         
     684           CALL CompSendFluxes(n)         
     685        END IF
     686        IF (step == 2)  CALL NullifyNeighbors(n)
     687        IF (RestartStep) EXIT
     688     END DO
     689     IF (n > 0) THEN
     690        CALL CoarsenDataForParents(n)
     691        CALL PostSendParentsData(n)                   
     692     END IF
     693
     694   END SUBROUTINE AMR
     695}}}