331 | | [[BR]] |
332 | | === Data Communication === |
| 333 | {{{ |
| 334 | RECURSIVE SUBROUTINE AMR(n) |
| 335 | USE TreeLevelComms |
| 336 | USE DataLevelComms |
| 337 | INTEGER :: n, nSteps, step |
| 338 | INTEGER :: iErr |
| 339 | |
| 340 | |
| 341 | IF (n <= 0) nSteps=1 |
| 342 | IF (n > 0) nSteps = 2 |
| 343 | IF (n > BaseLevel) THEN |
| 344 | CALL CompRecvGridsFromParents(n) |
| 345 | CALL SortNodes(n) |
| 346 | CALL CompSendGridsToChildren(n-1) |
| 347 | IF (n > -1) THEN |
| 348 | CALL InitInfos(n) |
| 349 | IF (n > 0) CALL PostRecvParentsData(n) |
| 350 | END IF |
| 351 | CALL PostRecvOverlapsNeighbors(n) |
| 352 | CALL PostRecvOldNodeOverlaps(n) |
| 353 | IF (n > 0) THEN |
| 354 | CALL CompRecvParentsData(n) |
| 355 | CALL ProlongateParentsData(n) |
| 356 | END IF |
| 357 | CALL CompRecvOverlapsNeighbors(n) |
| 358 | IF (n > -1) CALL ChildMaskOverlaps(n) |
| 359 | CALL CompRecvOldNodeOverlaps(n) |
| 360 | END IF |
| 361 | |
| 362 | DO step=1,nSteps |
| 363 | levels(n)%CurrentLevelStep=levels(n)%CurrentLevelStep+1 |
| 364 | levels(n)%step=step |
| 365 | |
| 366 | IF (step == 2) CALL UpdateOverlaps(n) |
| 367 | IF (n > -1) THEN |
| 368 | CALL GetLevelLoad(n) |
| 369 | CALL PostRecvOverlaps(n) |
| 370 | CALL PostSendOverlaps(n) |
| 371 | CALL ApplyOverlaps(n,step) |
| 372 | CALL CompRecvOverlaps(n) |
| 373 | END IF |
| 374 | |
| 375 | IF (n > 0) CALL AfterOverlaps(n) |
| 376 | IF (n > -1) THEN |
| 377 | CALL ParticleUpdate(n) |
| 378 | CALL ApplyPhysicalBCs(n) |
| 379 | # if defined HYPRE |
| 380 | CALL ApplyEllipticBC(n) |
| 381 | # endif |
| 382 | END IF |
| 383 | IF (n < MaxLevel) THEN |
| 384 | IF (n > -1) CALL SetErrFlags(n) |
| 385 | IF (step == 2 .OR. n <= BaseLevel) CALL AgeNodesChildren(n) |
| 386 | CALL AgeNodes(n+1) |
| 387 | CALL CreateChildrens(n) |
| 388 | CALL DistributeChildrens(n) |
| 389 | CALL PostSendGridsToChildren(n) |
| 390 | CALL PostRecvGridsFromParents(n+1) |
| 391 | IF (n > -1) CALL PostSendChildrenData(n) |
| 392 | CALL PostRecvNeighboringChildren(n) |
| 393 | CALL PostSendNeighboringChildren(n) |
| 394 | IF (step == 1 .AND. n > BaseLevel) THEN |
| 395 | CALL PostRecvOverlappingChildrenFromOldNodes(n) |
| 396 | CALL PostRecvOverlappingChildrenFromNewNodes(n) |
| 397 | CALL PostSendOverlappingChildrenToOldNodes(n) |
| 398 | CALL PostSendOverlappingChildrenToNewNodes(n) |
| 399 | CALL InheritOldNodeOverlapsChildren(n) |
| 400 | CALL InheritNewNodeOverlapsChildren(n) |
| 401 | CALL InheritNeighborsChildren(n) |
| 402 | CALL CompRecvOverlappingChildrenFromOldNodes(n) |
| 403 | CALL CompRecvOverlappingChildrenFromNewNodes(n) |
| 404 | CALL PostSendOverlapsToOldNodesChildren(n) |
| 405 | CALL CompRecvNeighboringChildren(n) |
| 406 | ELSE |
| 407 | CALL InheritOverlapsOldChildren(n) |
| 408 | CALL InheritNeighborsChildren(n) |
| 409 | CALL CompRecvNeighboringChildren(n) |
| 410 | CALL InheritOverlapsNewChildren(n) |
| 411 | CALL PostSendOverlapsToNodesOldChildren(n) |
| 412 | END IF |
| 413 | CALL PostSendOverlapsNeighbors(n) |
| 414 | IF (n > -1) CALL PostRecvChildrenData(n) |
| 415 | END IF |
| 416 | IF (n > -1) CALL BeforeGlobalStep(n) |
| 417 | |
| 418 | IF (n < MaxLevel) THEN |
| 419 | |
| 420 | IF (n > -1) THEN |
| 421 | CALL ApplyChildrenData(n) |
| 422 | CALL CompSendChildrenData(n) |
| 423 | END IF |
| 424 | CALL CompSendNeighboringChildren(n) |
| 425 | IF (step == 1 .AND. n > BaseLevel) THEN |
| 426 | CALL CompSendOverlappingChildrenToOldNodes(n) |
| 427 | CALL CompSendOverlappingChildrenToNewNodes(n) |
| 428 | CALL CompSendOverlapsToOldNodesChildren(n) |
| 429 | ELSE |
| 430 | CALL CompSendOverlapsToNodesOldChildren(n) |
| 431 | END IF |
| 432 | CALL CompSendOverlapsNeighbors(n) |
| 433 | IF (n > -1) THEN |
| 434 | CALL CompRecvChildrenData(n) |
| 435 | CALL CompSendParentsData(n+1) |
| 436 | END IF |
| 437 | END IF |
| 438 | IF (n > -1) THEN |
| 439 | CALL RestrictionFixups(n) |
| 440 | CALL AfterFixups(n) |
| 441 | END IF |
| 442 | IF (n > -1) THEN |
| 443 | CALL PostRecvFluxes(n) |
| 444 | CALL PostSendFluxes(n) |
| 445 | IF (iThreaded == 0 .AND. n > 0) CALL WaitingAdvances(n) |
| 446 | CALL PrintAdvance(n) |
| 447 | #if defined HYPRE |
| 448 | IF (lElliptic) CALL Elliptic(n) |
| 449 | #endif |
| 450 | IF (n < MaxLevel) CALL UpdateChildMasks(n) |
| 451 | CALL SyncFluxes(n) |
| 452 | CALL CompRecvFluxes(n) |
| 453 | END IF |
| 454 | IF (n > 0) CALL AccumulateFluxes(n) |
| 455 | IF (n > -1) THEN |
| 456 | CALL CompSendOverlaps(n) |
| 457 | CALL CompSendFluxes(n) |
| 458 | END IF |
| 459 | IF (step == 2) CALL NullifyNeighbors(n) |
| 460 | IF (RestartStep) EXIT |
| 461 | END DO |
| 462 | IF (n > 0) THEN |
| 463 | CALL CoarsenDataForParents(n) |
| 464 | CALL PostSendParentsData(n) |
| 465 | END IF |
| 466 | |
| 467 | END SUBROUTINE AMR |
| 468 | |
| 469 | }}} |
| 470 | [[BR]] |
| 471 | ==== Data Communication ==== |
369 | | [[BR]] |
370 | | == Threading == |
371 | | |
372 | | |
373 | | There are several threading options for parallelizing the hydro advance across levels. There are currently three basic approaches to address this |
374 | | * Threading the Advances - The advancing of each level can be done independently although higher level threads should have higher priorities |
375 | | * Threading the AMR levels - Each AMR level can also be thought of as an independent thread. Unfortunately this approach requires threads to communicate with other threads on different processors. This requires MPI to be extremely thread safe |
376 | | * !PseudoThreading - This is essentially careful scheduling of the advances to try and mimic the switching that would occur under a threaded implementation. This has the advance of not requiring any external libraries. |
| 508 | '''''IMPORTANT:''''' {{{AMR()}}} is a recursive algorithm, and many of the communications are inter-level communications. Consequently, a send or receive might be posted on one level and completed on another, which can make the algorithm tricky to follow. If you are a new user stepping through the AMR algorithm for the first time, start your traversal at {{{n = -2}}} and follow along as you add levels. |
| 509 | |
| 510 | [[BR]] |
| 511 | == Round 6: Scheduling and Threading == |
| 512 | |
| 513 | Several attempts have been made to incorporate [http://en.wikipedia.org/wiki/Thread_(computer_science) threads] into AstroBEAR in order to achieve global load balancing. There are still formidable technical issues to overcome with threading, so for the time being AstroBEAR uses a "pseudo-threaded" scheduling approach that mimics threading through careful management of the advances across all levels. |
| 514 | |
| 515 | The scheduling code introduces three new subroutines into {{{AMR()}}}: |
| 516 | |
| 517 | * {{{ScheduledAdvanceGrids(n)}}} -- Calculate the workload for level {{{n}}}. |
| 518 | * {{{WaitingAdvances(n)}}} -- Advances grids on level {{{n}}}, and if there is time advances coarser grids while waiting for the other level {{{n}}} advances to finish. |
| 519 | * {{{CompleteAdvanceGrids(n)}}} -- Finishes advancing grids on level {{{n}}}. |
| 520 | |
| 521 | The main advantage of the scheduling approach is the lack of external libraries. Implementing threads in a Fortran-based code like AstroBEAR requires specialized libraries or wrappers for POSIX threads. These libraries are not available on all clusters, which would require us to set up additional libraries on any machine where we wanted to run AstroBEAR. |
| 522 | |
| 523 | We are considering two possible approaches for including threads in AstroBEAR: |
| 524 | * Use threads to make the advance step of each level independent. Higher-level threads will need higher priorities, since their data is required to finish the lower-level steps. |
| 525 | * Assign ''all'' of a level's operations to a thread. This approach would be promising, but it requires threads to communicate with other threads on different processors. This is a risky proposition, as older version of MPI are not thread-safe. |
| 526 | |
| 527 | Threads introduce three new subroutines into {{{AMR()}}}: |
| 528 | |
| 529 | * {{{ThreadsInit()}}} -- Initializes thread variables and the threading environment. |
| 530 | * {{{LaunchAdvanceThread(n)}}} -- Creates a new thread for level {{{n}}}. |
| 531 | * {{{JoinAdvanceThread(n)}}} -- Rejoins the level-{{{n}}} thread with the main program after it has finished its advance. |
| 532 | |
| 534 | |
| 535 | |
| 536 | The final iteration of the AMR algorithm (minus the various {{{Timer()}}} calls) looks like this: |
| 537 | |
| 538 | {{{ |
| 539 | RECURSIVE SUBROUTINE AMR(n) |
| 540 | USE TreeLevelComms |
| 541 | USE DataLevelComms |
| 542 | INTEGER :: n, nSteps, step |
| 543 | INTEGER :: iErr |
| 544 | |
| 545 | |
| 546 | IF (n <= 0) nSteps=1 |
| 547 | IF (n > 0) nSteps = 2 |
| 548 | IF (n > BaseLevel) THEN |
| 549 | CALL CompRecvGridsFromParents(n) |
| 550 | CALL SortNodes(n) |
| 551 | CALL CompSendGridsToChildren(n-1) |
| 552 | IF (n > -1) THEN |
| 553 | CALL InitInfos(n) |
| 554 | IF (n > 0) CALL PostRecvParentsData(n) |
| 555 | END IF |
| 556 | CALL PostRecvOverlapsNeighbors(n) |
| 557 | CALL PostRecvOldNodeOverlaps(n) |
| 558 | IF (n > 0) THEN |
| 559 | CALL CompRecvParentsData(n) |
| 560 | CALL ProlongateParentsData(n) |
| 561 | END IF |
| 562 | CALL CompRecvOverlapsNeighbors(n) |
| 563 | IF (n > -1) CALL ChildMaskOverlaps(n) |
| 564 | CALL CompRecvOldNodeOverlaps(n) |
| 565 | END IF |
| 566 | |
| 567 | DO step=1,nSteps |
| 568 | levels(n)%CurrentLevelStep=levels(n)%CurrentLevelStep+1 |
| 569 | levels(n)%step=step |
| 570 | |
| 571 | IF (step == 2) CALL UpdateOverlaps(n) |
| 572 | IF (n > -1) THEN |
| 573 | CALL GetLevelLoad(n) |
| 574 | CALL PostRecvOverlaps(n) |
| 575 | CALL PostSendOverlaps(n) |
| 576 | CALL ApplyOverlaps(n,step) |
| 577 | CALL CompRecvOverlaps(n) |
| 578 | END IF |
| 579 | |
| 580 | IF (n > 0) CALL AfterOverlaps(n) |
| 581 | IF (n > -1) THEN |
| 582 | CALL ParticleUpdate(n) |
| 583 | CALL ApplyPhysicalBCs(n) |
| 584 | # if defined HYPRE |
| 585 | CALL ApplyEllipticBC(n) |
| 586 | # endif |
| 587 | END IF |
| 588 | IF (n < MaxLevel) THEN |
| 589 | IF (n > -1) CALL SetErrFlags(n) |
| 590 | IF (step == 2 .OR. n <= BaseLevel) CALL AgeNodesChildren(n) |
| 591 | CALL AgeNodes(n+1) |
| 592 | CALL CreateChildrens(n) |
| 593 | CALL DistributeChildrens(n) |
| 594 | CALL PostSendGridsToChildren(n) |
| 595 | CALL PostRecvGridsFromParents(n+1) |
| 596 | IF (n > -1) CALL PostSendChildrenData(n) |
| 597 | CALL PostRecvNeighboringChildren(n) |
| 598 | CALL PostSendNeighboringChildren(n) |
| 599 | IF (step == 1 .AND. n > BaseLevel) THEN |
| 600 | CALL PostRecvOverlappingChildrenFromOldNodes(n) |
| 601 | CALL PostRecvOverlappingChildrenFromNewNodes(n) |
| 602 | CALL PostSendOverlappingChildrenToOldNodes(n) |
| 603 | CALL PostSendOverlappingChildrenToNewNodes(n) |
| 604 | CALL InheritOldNodeOverlapsChildren(n) |
| 605 | CALL InheritNewNodeOverlapsChildren(n) |
| 606 | CALL InheritNeighborsChildren(n) |
| 607 | CALL CompRecvOverlappingChildrenFromOldNodes(n) |
| 608 | CALL CompRecvOverlappingChildrenFromNewNodes(n) |
| 609 | CALL PostSendOverlapsToOldNodesChildren(n) |
| 610 | CALL CompRecvNeighboringChildren(n) |
| 611 | ELSE |
| 612 | CALL InheritOverlapsOldChildren(n) |
| 613 | CALL InheritNeighborsChildren(n) |
| 614 | CALL CompRecvNeighboringChildren(n) |
| 615 | CALL InheritOverlapsNewChildren(n) |
| 616 | CALL PostSendOverlapsToNodesOldChildren(n) |
| 617 | END IF |
| 618 | CALL PostSendOverlapsNeighbors(n) |
| 619 | IF (n > -1) CALL PostRecvChildrenData(n) |
| 620 | END IF |
| 621 | IF (n > -1) CALL BeforeGlobalStep(n) |
| 622 | |
| 623 | !-------------------------------- Threading options ------------------------------------- |
| 624 | |
| 625 | !Option 1: Just create threads to do the advancing on each level |
| 626 | IF (iThreaded <= 0) THEN |
| 627 | IF (n > -1 .AND. iThreaded == 0) CALL ScheduledAdvanceGrids(n) |
| 628 | IF (n < MaxLevel) CALL AMR(n+1) |
| 629 | IF (n > -1) THEN |
| 630 | IF (iThreaded == 0) THEN |
| 631 | CALL CompleteAdvanceGrids(n) |
| 632 | ELSE |
| 633 | CALL AdvanceGrids(n) |
| 634 | END IF |
| 635 | END IF |
| 636 | #if defined PTHREADS |
| 637 | ELSEIF (iThreaded > 0) THEN |
| 638 | IF (n > -1) CALL LaunchAdvanceThread(n) |
| 639 | IF (n < MaxLevel) CALL AMR(n+1) |
| 640 | IF (n > -1) CALL JoinAdvanceThread(n) |
| 641 | # endif |
| 642 | END IF |
| 643 | ! ---------------------------- End threading options -------------------------------------- |
| 644 | |
| 645 | IF (n < MaxLevel) THEN |
| 646 | |
| 647 | IF (n > -1) THEN |
| 648 | CALL ApplyChildrenData(n) |
| 649 | CALL CompSendChildrenData(n) |
| 650 | END IF |
| 651 | CALL CompSendNeighboringChildren(n) |
| 652 | IF (step == 1 .AND. n > BaseLevel) THEN |
| 653 | CALL CompSendOverlappingChildrenToOldNodes(n) |
| 654 | CALL CompSendOverlappingChildrenToNewNodes(n) |
| 655 | CALL CompSendOverlapsToOldNodesChildren(n) |
| 656 | ELSE |
| 657 | CALL CompSendOverlapsToNodesOldChildren(n) |
| 658 | END IF |
| 659 | CALL CompSendOverlapsNeighbors(n) |
| 660 | IF (n > -1) THEN |
| 661 | CALL CompRecvChildrenData(n) |
| 662 | CALL CompSendParentsData(n+1) |
| 663 | END IF |
| 664 | END IF |
| 665 | IF (n > -1) THEN |
| 666 | CALL RestrictionFixups(n) |
| 667 | CALL AfterFixups(n) |
| 668 | END IF |
| 669 | IF (n > -1) THEN |
| 670 | CALL PostRecvFluxes(n) |
| 671 | CALL PostSendFluxes(n) |
| 672 | IF (iThreaded == 0 .AND. n > 0) CALL WaitingAdvances(n) |
| 673 | CALL PrintAdvance(n) |
| 674 | #if defined HYPRE |
| 675 | IF (lElliptic) CALL Elliptic(n) |
| 676 | #endif |
| 677 | IF (n < MaxLevel) CALL UpdateChildMasks(n) |
| 678 | CALL SyncFluxes(n) |
| 679 | CALL CompRecvFluxes(n) |
| 680 | END IF |
| 681 | IF (n > 0) CALL AccumulateFluxes(n) |
| 682 | IF (n > -1) THEN |
| 683 | CALL CompSendOverlaps(n) |
| 684 | CALL CompSendFluxes(n) |
| 685 | END IF |
| 686 | IF (step == 2) CALL NullifyNeighbors(n) |
| 687 | IF (RestartStep) EXIT |
| 688 | END DO |
| 689 | IF (n > 0) THEN |
| 690 | CALL CoarsenDataForParents(n) |
| 691 | CALL PostSendParentsData(n) |
| 692 | END IF |
| 693 | |
| 694 | END SUBROUTINE AMR |
| 695 | }}} |