@inproceedings {INPROC-2018-52,
   author = {Nehzat Emamy and Pascal Litty and Thomas Klotz and Miriam Mehl and Oliver R{\"o}hrle},
   title = {{POD-DEIM model order reduction of the Monodomain Reaction-Diffusion Sub-Model of the Neuro-Muscular System}},
   booktitle = {IUTAM Symposium on Model Order Reduction of Coupled Systems; Stuttgart, Germany, May 22-25, 2018: MORCOS 2018},
   editor = {J. Fehr and B. Haasdonk},
   publisher = {Springer},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--14},
   type = {Conference Paper},
   month = {May},
   year = {2018},
   isbn = {879-3-030-21012-0},
   language = {English},
   cr-category = {I.6.0 Simulation and Modeling General},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Simulation of Large Systems},
   abstract = {We apply POD-DEIM model order reduction to a 0D/1D model used to simulate the
      propagation of action potentials through the myocardium or along skeletal
      muscle fibers. This corresponding system of ODEs (reaction) and PDEs
      (diffusion) is called the monodomain equation. 0D sets of ODEs describing the
      ionic currents flowing across the cell membrane are coupled along muscle fibers
      through a \$1\$D diffusion process for the transmembrane potential. Due to the
      strong coupling of the transmembrane potential and other state variables
      describing the behavior of the membrane, a total reduction strategy including
      all degrees of freedom turns out to be more efficient than a reduction of only
      the transmembrane potential. The total reduction approach is four orders of
      magnitude more accurate than partial reduction and shows a faster convergence
      in the number of POD modes with respect to the mesh refinement. A speedup of
      \$2.7\$ is achieved for a 1D mesh with \$320\$ nodes. Considering the DEIM
      approximation in combination with the total reduction, the nonlinear functions
      corresponding to the ionic state variables are also approximated in addition to
      the nonlinear ionic current in the monodomain equation. We observe that the
      same number of DEIM interpolation points as the number of POD modes is the
      optimal choice regarding stability, accuracy and runtime for the current
      POD-DEIM approach.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-52&amp;engl=1}
}

@inproceedings {INPROC-2018-51,
   author = {Nehzat Emamy and Pascal Litty and Thomas Klotz and Miriam Mehl and Oliver R{\"o}hrle},
   title = {{POD-DEIM model order reduction for the Monodomain reaction-dissusion equation in neuro-muscular system}},
   booktitle = {Proceedings of 6th European Conference on Computational Mechanics (Solids, Structures and Coupled Problems) (ECCM 6) and the 7th European Conference on Computational Fluid Dynamics (ECFD 7); Glasgow, UK, June 11-15, 2018},
   editor = {Roger Owen and Ren{\'e} de Borst and Jason Reese and Chris Pearce},
   publisher = {International Center for Numerical Methods in Engineerin (CIMNE)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {2514--2524},
   type = {Conference Paper},
   month = {June},
   year = {2018},
   isbn = {978-84-947311-6-7},
   language = {English},
   cr-category = {A General Literature,
                   G Mathematics of Computing,
                   E Data},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2018-51/INPROC-2018-51.pdf},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Simulation of Large Systems},
   abstract = {We apply the POD-DEIM model order reduction to the propagation of the
      transmembrane potential along \$1\$D muscle fibers. This propagation is
      represented using the monodomain partial differential equation. The monodomain
      equation, which is a reaction-diffusion equation, is coupled through its
      reaction term with a set of ordinary differential equations, which provide the
      ionic current across the cell membrane. Due to the strong coupling of the
      transmembrane potential and ionic state variables, we reduce them all together
      proposing a total reduction strategy. We compare the current strategy with the
      conventional strategy of reducing the transmembrane potential. Considering the
      current approach, the discrete system matrix is slightly modified to adjust for
      the size. However, size of the precomputed reduced system matrix remains the
      same, which means the same computational cost. The current approach appears to
      be four orders of magnitude more accurate considering the equivalent number of
      modes on the same grid in comparison to the conventional approach. Moreover, it
      shows a faster convergence in the number of POD modes with respect to the grid
      refinement. Using the DEIM approximation of nonlinear functions in combination
      with the total reduction, the nonlinear functions corresponding to the ionic
      state variables are also approximated besides the nonlinear ionic current in
      the monodomain equation. For the current POD-DEIM approach, it appears that the
      same number of DEIM interpolation points as the number of POD modes is the
      optimal choice regarding stability, accuracy and runtime.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-51&amp;engl=1}
}

@inproceedings {INPROC-2018-50,
   author = {Steffen Hirschmann and Michael Lahnert and Carolin Schober and Malte Brunn and Miriam Mehl and Dirk Pfl{\"u}ger},
   title = {{Load-Balancing and Spatial Adaptivity for Coarse-Grained Molecular Dynamics Applications}},
   booktitle = {High Performance Computing in Science and Engineering '18},
   editor = {Wolfgang E. Nagel and Dietmar H. Kr{\"o}ner and Michael M. Resch},
   publisher = {Springer International Publishing},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--510},
   type = {Conference Paper},
   month = {October},
   year = {2018},
   isbn = {978-3-030-13324-5},
   doi = {10.1007/978-3-030-13325-2},
   language = {English},
   cr-category = {G.1.0 Numerical Analysis General},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2018-50/INPROC-2018-50.pdf},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Simulation of Large Systems},
   abstract = {We present our approach for a scalable implementation of coupled soft matter
      simulations for inhomogeneous applications based on the simulation package
      ESPResSo and an extended version of the adaptive grid framework p4est. Our main
      contribution in this paper is the development and implementation of a joint
      partitioning of two or more distinct octree-based grids based on the concept of
      a finest common tree. This concept guarantees that, on all grids, the same
      process is responsible for each point in space and, thus, avoids communication
      of data in overlapping volumes handled in different partitions. We achieve up
      to 85 \% parallel efficiency in a weak scaling setting. Our proposed algorithms
      take only a small fraction of the overall runtime of grid adaption.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-50&amp;engl=1}
}

@inproceedings {INPROC-2018-09,
   author = {Julian Valentin and Dirk Pfl{\"u}ger},
   title = {{Fundamental Splines on Sparse Grids and Their Application to Gradient-Based Optimization}},
   booktitle = {Sparse Grids and Applications - Miami 2016},
   editor = {Jochen Garcke and Dirk Pfl{\"u}ger and Clayton G. Webster and Guannan Zhang},
   publisher = {Springer},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Computational Science and Engineering},
   volume = {123},
   pages = {229--251},
   type = {Conference Paper},
   month = {January},
   year = {2018},
   doi = {10.1007/978-3-319-75426-0_10},
   keywords = {sparse grids; optimization; B-splines},
   language = {English},
   cr-category = {G.1.6 Numerical Analysis Optimization},
   ee = {https://dx.doi.org/10.1007/978-3-319-75426-0_10},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Simulation of Large Systems},
   abstract = {},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-09&amp;engl=1}
}

@inproceedings {INPROC-2018-08,
   author = {David Pfander and Gregor Dai{\ss} and Dirk Pfl{\"u}ger and Dominic Marcello and Hartmut Kaiser},
   title = {{Accelerating Octo-Tiger: Stellar Mergers on Intel Knights Landing with HPX}},
   booktitle = {Proceedings of the 6th International Workshop on OpenCL},
   publisher = {ACM},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--9},
   type = {Conference Paper},
   month = {May},
   year = {2018},
   language = {English},
   cr-category = {D.1 Programming Techniques,
                   D.3.4 Programming Languages Processors,
                   G.4 Mathematical Software},
   contact = {submitted},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Simulation of Large Systems},
   abstract = {The optimization of performance of complex simulation codes with high
      computational demands, such as Octo-Tiger, is an ongoing challenge. Octo-Tiger
      is an astrophysics code simulating the evolution of star systems based on the
      fast multipole method on adaptive octrees. It was implemented using high-level
      C++ libraries, specifically HPX and Vc, which allows its use on different
      hardware platforms. Recently, we have demonstrated excellent scalability in a
      distributed setting. In this paper, we study Octo-Tiger{\^a}€™s node-level
      performance on an Intel Knights Landing platform. We focus on the fast
      multipole method, as it is Octo-Tiger{\^a}€™s computationally most demanding
      component. By using HPX and a futurization approach, we can efficiently
      traverse the adaptive octrees in parallel. On the core-level, threads process
      sub-grids using multiple 743-element stencils. In numerical experiments,
      simulating the time evolution of a rotating star on an Intel Xeon Phi 7250
      Knights Landing processor, Octo-Tiger shows good parallel efficiency and
      achieves up to 408 GFLOPS. This results in a speedup of 2x compared to a
      24-core Skylake-SP platform, using the same high-level abstractions.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-08&amp;engl=1}
}

@inproceedings {INPROC-2018-07,
   author = {David Pfander and Malte Brunn and Dirk Pfl{\"u}ger},
   title = {{AutoTuneTMP: Auto-Tuning in C++ With Runtime Template Metaprogramming}},
   booktitle = {2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--10},
   type = {Conference Paper},
   month = {May},
   year = {2018},
   keywords = {auto-tuning; template metaprogramming; just-in-time compilation; performance engineering},
   language = {English},
   cr-category = {D.3.4 Programming Languages Processors},
   contact = {submitted},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Simulation of Large Systems},
   abstract = {},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2018-07&amp;engl=1}
}

@article {ART-2018-08,
   author = {Chris P. Bradley and Nehzat Emamy and Thomas Ertl and Dominik G{\"o}ddeke and Andreas Hessenthaler and Thomas Klotz and Aaron Kr{\"a}mer and Michael Krone and Benjamin Maier and Miriam Mehl and Tobias Rau and Oliver R{\"o}hrle},
   title = {{Enabling Detailed, Biophysics-Based Skeletal Muscle Models on HPC Systems}},
   journal = {Frontiers in Physiology},
   publisher = {frontiers},
   volume = {9},
   pages = {816--816},
   type = {Article in Journal},
   month = {July},
   year = {2018},
   doi = {10.3389/fphys.2018.00816},
   language = {English},
   cr-category = {G.0 Mathematics of Computing General},
   ee = {https://www.frontiersin.org/article/10.3389/fphys.2018.00816},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Simulation of Large Systems},
   abstract = {},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2018-08&amp;engl=1}
}

@article {ART-2018-03,
   author = {Stefanie Stalter and Leonid Yelash and Nehzat Emamy and Antonia Statt and Martin Hanke and Luk{\'a}\&\#269 and Maria Ov{\'a}-Medvid’ov{\'a} and Peter Virnau},
   title = {{Molecular dynamics simulations in hybrid particle-continuum schemes: Pitfalls and caveats}},
   journal = {Computer Physics Communications},
   publisher = {Elsevier},
   volume = {224},
   pages = {198--208},
   type = {Article in Journal},
   month = {March},
   year = {2018},
   language = {English},
   cr-category = {I.6.0 Simulation and Modeling General},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Simulation of Large Systems},
   abstract = {Heterogeneous multiscale methods (HMM) combine molecular accuracy of
      particle-based simulations with the computational efficiency of continuum
      descriptions to model flow in soft matter liquids. In these schemes, molecular
      simulations typically pose a computational bottleneck, which we investigate in
      detail in this study. We find that it is preferable to simulate many small
      systems as opposed to a few large systems, and that a choice of a simple
      isokinetic thermostat is typically sufficient while thermostats such as
      Lowe{\^a}€“Andersen allow for simulations at elevated viscosity. We discuss
      suitable choices for time steps and finite-size effects which arise in the
      limit of very small simulation boxes. We also argue that if colloidal systems
      are considered as opposed to atomistic systems, the gap between microscopic and
      macroscopic simulations regarding time and length scales is significantly
      smaller. We propose a novel reduced-order technique for the coupling to the
      macroscopic solver, which allows us to approximate a non-linear stress{\^a}€“strain
      relation efficiently and thus further reduce computational effort of
      microscopic simulations.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2018-03&amp;engl=1}
}

@article {ART-2018-02,
   author = {Julian Valentin and Michael Sprenger and Dirk Pfl{\"u}ger and Oliver R{\"o}hrle},
   title = {{Gradient-Based Optimization with B-Splines on Sparse Grids for Solving Forward-Dynamics Simulations of Three-Dimensional, Continuum-Mechanical Musculoskeletal System Models}},
   journal = {International Journal for Numerical Methods in Biomedical Engineering},
   publisher = {Wiley},
   pages = {1--16},
   type = {Article in Journal},
   month = {January},
   year = {2018},
   doi = {10.1002/cnm.2965},
   language = {English},
   cr-category = {G.1.1 Numerical Analysis Interpolation,
                   G.1.6 Numerical Analysis Optimization},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Simulation of Large Systems},
   abstract = {},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2018-02&amp;engl=1}
}

