@inproceedings {INPROC-2019-32,
   author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
   title = {{Data-Driven Fault Diagnosis in End-of-Line Testing of Complex Products}},
   booktitle = {Proceedings of the 6th IEEE International Conference on Data Science and Advanced Analytics (DSAA 2019), Washington, D.C., USA},
   publisher = {IEEE Xplore},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {October},
   year = {2019},
   keywords = {decision support; classification; ensembles; automotive; fault diagnosis; quality management; sampling},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Machine learning approaches may support various use cases in the manufacturing
      industry. However, these approaches often do not address the inherent
      characteristics of the real manufacturing data at hand. In fact, real data
      impose analytical challenges that have a strong influence on the performance
      and suitability of machine learning methods. This paper considers such a
      challenging use case in the area of End-of-Line testing, i.e., the final
      functional check of complex products after the whole assembly line. Here,
      classification approaches may be used to support quality engineers in
      identifying faulty components of defective products. For this, we discuss
      relevant data sources and their characteristics, and we derive the resulting
      analytical challenges. We have identified a set of sophisticated data-driven
      methods that may be suitable to our use case at first glance, e.g., methods
      based on ensemble learning or sampling. The major contribution of this paper is
      a thorough comparative study of these methods to identify whether they are able
      to cope with the analytical challenges. This comprises the discussion of both
      fundamental theoretical aspects and major results of detailed experiments we
      have performed on the real data of our use case.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-32&amp;engl=1}
}

@inproceedings {INPROC-2019-20,
   author = {Manuel Fritz and Holger Schwarz},
   title = {{Initializing k-Means Efficiently: Benefits for Exploratory Cluster Analysis}},
   booktitle = {On the Move to Meaningful Internet Systems: OTM 2019 Conferences},
   editor = {Herv{\'e} Panetto and Christophe Debruyne and Martin Hepp and Dave Lewis and Claudio Agostino Ardagna and Robert Meersman},
   publisher = {Springer Nature Switzerland AG},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Computer Science (LNCS)},
   volume = {11877},
   pages = {146--163},
   type = {Conference Paper},
   month = {January},
   year = {2019},
   issn = {978-3-030-33245-7},
   doi = {10.1007/978-3-030-33246-4},
   keywords = {Exploratory cluster analysis; k-Means; Initialization},
   language = {English},
   cr-category = {E.0 Data General,
                   H.2.8 Database Applications,
                   H.3.3 Information Search and Retrieval},
   ee = {https://link.springer.com/chapter/10.1007/978-3-030-33246-4_9},
   contact = {manuel.fritz@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Data analysis is a highly exploratory task, where various algorithms with
      different parameters are executed until a solid result is achieved. This is
      especially evident for cluster analyses, where the number of clusters must be
      provided prior to the execution of the clustering algorithm. Since this number
      is rarely known in advance, the algorithm is typically executed several times
      with varying parameters. Hence, the duration of the exploratory analysis
      heavily dependends on the runtime of each execution of the clustering
      algorithm. While previous work shows that the initialization of clustering
      algorithms is crucial for fast and solid results, it solely focuses on a single
      execution of the clustering algorithm and thereby neglects previous executions.
      We propose Delta Initialization as an initialization strategy for k-Means in
      such an exploratory setting. The core idea of this new algorithm is to exploit
      the clustering results of previous executions in order to enhance the
      initialization of subsequent executions. We show that this algorithm is well
      suited for exploratory cluster analysis as considerable speedups can be
      achieved while additionally achieving superior clustering results compared to
      state-of-the-art initialization strategies.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-20&amp;engl=1}
}

@inproceedings {INPROC-2019-19,
   author = {Christoph Stach},
   title = {{VAULT: A Privacy Approach towards High-Utility Time Series Data}},
   booktitle = {Proceedings of the Thirteenth International Conference on Emerging Security Information, Systems and Technologies: SECURWARE 2019},
   publisher = {IARIA},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--6},
   type = {Conference Paper},
   month = {October},
   year = {2019},
   keywords = {Privacy; Time Series; Projection; Selection; Aggregation; Interpolation; Smoothing; Information Emphasization; Noise},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {While the Internet of Things (IoT) is a key driver for Smart Services that
      greatly facilitate our everyday life, it also poses a serious threat to
      privacy. Smart Services collect and analyze a vast amount of (partly private)
      data and thus gain valuable insights concerning their users. To prevent this,
      users have to balance service quality (i.e., reveal a lot of private data) and
      privacy (i.e., waive many features). Current IoT privacy approaches do not
      reflect this discrepancy properly and are often too restrictive as a
      consequence. For this reason, we introduce VAULT, a new approach for the
      protection of private data. VAULT is tailored to time series data as used by
      the IoT. It achieves a good tradeoff between service quality and privacy. For
      this purpose, VAULT applies five different privacy techniques. Our
      implementation of VAULT adopts a Privacy by Design approach.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-19&amp;engl=1}
}

@inproceedings {INPROC-2019-18,
   author = {Dominik Lucke and Frank Steimle and Emir Cuk and Michael Luckert and Matthias Schneider and Daniel Schel},
   title = {{Implementation of the MIALinx User Interface for Future Manufacturing Environments}},
   booktitle = {Proceedings of the 52nd CIRP Conference on Manufacturing Systems (CMS), Ljubljana, Slovenia, June 12-14, 2019},
   publisher = {Elsevier},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Procedia CIRP},
   volume = {81},
   pages = {606--611},
   type = {Conference Paper},
   month = {June},
   year = {2019},
   doi = {10.1016/j.procir.2019.03.163},
   keywords = {Manufacturing; Smart Factory; Industrie 4.0; Manufacturing Service Bus; Rules; Integration; User Interface},
   language = {English},
   cr-category = {H.4.0 Information Systems Applications General,
                   I.2.1 Applications and Expert Systems},
   ee = {http://www.sciencedirect.com/science/article/pii/S2212827119304688},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The flexible and easy-to-use integration of production equipment and IT systems
      on the shop floor becomes more and more a success factor for manufacturing to
      adapt rapidly to changing situations. The approach of the Manufacturing
      Integration Assistant (MIALinx) is to simplify this challenge. The integration
      steps range from integrating sensors over collecting and rule-based processing
      of sensor information to the execution of required actions. This paper presents
      the implementation of MIALinx to retrofit legacy machines for Industry 4.0 in a
      manufacturing environment and focus on the concept and implementation of the
      easy-to-use user interface as a key element.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-18&amp;engl=1}
}

@inproceedings {INPROC-2019-16,
   author = {Marco Spie{\ss} and Peter Reimann},
   title = {{Angepasstes Item Set Mining zur gezielten Steuerung von Bauteilen in der Serienfertigung von Fahrzeugen}},
   booktitle = {Tagungsband der 18. Konferenz Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2019)},
   publisher = {Gesellschaft f{\"u}r Informatik (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {119--128},
   type = {Conference Paper},
   month = {March},
   year = {2019},
   language = {German},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Qualit{\"a}tsprobleme im Bereich Fahrzeugbau k{\"o}nnen nicht nur zum Imageverlust des
      Unternehmens f{\"u}hren, sondern auch mit entsprechend hohen Kosten einhergehen.
      Wird ein Bauteil als Verursacher eines Qualit{\"a}tsproblems identifiziert, muss
      dessen Verbau gestoppt werden. Mit einer Datenanalyse kann herausgefunden
      werden, welche Fahrzeugkonfigurationen Probleme mit diesem fehlerverursachenden
      Bauteil haben. Im Rahmen der dom{\"a}nenspezifischen Problemstellung wird in diesem
      Beitrag die Anwendbarkeit von Standardalgorithmen aus dem Bereich Data-Mining
      untersucht. Da die Analyseergebnisse auf Standardausstattungen hinweisen, sind
      diese nicht zielf{\"u}hrend. F{\"u}r dieses Businessproblem von Fahrzeugherstellern
      haben wir einen Data-Mining Algorithmus entwickelt, der das Vorgehen des Item
      Set Mining der Assoziationsanalyse an das dom{\"a}nenspezifische Problem anpasst.
      Er unterscheidet sich zum klassischen Apriori-Algorithmus in der Beschneidung
      des Ergebnisraumes sowie in der nachfolgenden Aufbereitung und Verwendungsweise
      der Item Sets. Der Algorithmus ist allgemeing{\"u}ltig f{\"u}r alle Fahrzeughersteller
      anwendbar. Die Ergebnisse sind anhand eines realen Anwendungsfalls evaluiert
      worden, bei dem durch die Anwendung unseres Algorithmus 87\% der Feldausf{\"a}lle
      verhindert werden k{\"o}nnen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-16&amp;engl=1}
}

@inproceedings {INPROC-2019-15,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz},
   title = {{Modeling Data Lakes with Data Vault: Practical Experiences, Assessment, and Lessons Learned}},
   booktitle = {Proceedings of the 38th Conference on Conceptual Modeling (ER 2019)},
   publisher = {Springer},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--14},
   type = {Conference Paper},
   month = {November},
   year = {2019},
   keywords = {Data Lakes; Data Vault; Data Modeling; Industry Experience; Assessment; Lessons Learned},
   language = {German},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Data lakes have become popular to enable organization-wide analytics on
      heterogeneous data from multiple sources. Data lakes store data in their raw
      format and are often characterized as schema-free. Nevertheless, it turned out
      that data still need to be modeled, as neglecting data modeling may lead to
      issues concerning e.g., quality and integration. In current research literature
      and industry practice, Data Vault is a popular modeling technique for
      structured data in data lakes. It promises a flexible, extensible data model
      that preserves data in their raw format. However, hardly any research or
      assessment exist on the practical usage of Data Vault for modeling data lakes.
      In this paper, we assess the Data Vault model{\^a}€檚 suitability for the data lake
      context, present lessons learned, and investigate success factors for the use
      of Data Vault. Our discussion is based on the practical usage of Data Vault in
      a large, global manufacturer{\^a}€檚 data lake and the insights gained in
      real-world analytics projects.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-15&amp;engl=1}
}

@inproceedings {INPROC-2019-14,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz},
   title = {{Leveraging the Data Lake - Current State and Challenges}},
   booktitle = {Proceedings of the 21st International Conference on Big Data Analytics and Knowledge Discovery (DaWaK'19)},
   publisher = {Springer Nature},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--10},
   type = {Conference Paper},
   month = {August},
   year = {2019},
   keywords = {Data Lakes, State of the Art, Challenges},
   language = {German},
   cr-category = {H.2.4 Database Management Systems,
                   H.2.8 Database Applications},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The digital transformation leads to massive amounts of heterogeneous data
      challenging traditional data warehouse solutions in enterprises. In order to
      exploit these complex data for competitive advantages, the data lake recently
      emerged as a concept for more flexible and powerful data analytics. However,
      existing literature on data lakes is rather vague and incomplete, and the
      various realization approaches that have been proposed neither cover all
      aspects of data lakes nor do they provide a comprehensive design and
      realization strategy. Hence, enterprises face multiple challenges when building
      data lakes. To address these shortcomings, we investigate existing data lake
      literature and discuss various design and realization aspects for data lakes,
      such as governance or data models. Based on these insights, we identify
      challenges and research gaps concerning (1) data lake architecture, (2) data
      lake governance, and (3) a comprehensive strategy to realize data lakes. These
      challenges still need to be addressed to successfully leverage the data lake in
      practice.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-14&amp;engl=1}
}

@inproceedings {INPROC-2019-12,
   author = {Christoph Stach},
   title = {{Konzepte zum Schutz privater Muster in Zeitreihendaten: IoT-Anwendungen im Spannungsfeld zwischen Servicequalit{\"a}t und Datenschutz}},
   booktitle = {Informatik 2019: 50 Jahre Gesellschaft f{\"u}r Informatik � Informatik f{\"u}r Gesellschaft, Tagungsband der 49. Jahrestagung der Gesellschaft f{\"u}r Informatik e.V. (GI), 23.09. - 26.09.2019, Kassel},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {1--14},
   type = {Conference Paper},
   month = {September},
   year = {2019},
   keywords = {Datenschutz; Zeitreihendaten; IoT; DSGVO; ePrivacy-Verordnung; TICK-Stack},
   language = {German},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   G.1.10 Numerical Analysis Applications},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Obwohl das Internet der Dinge (IoT) die Voraussetzung f{\"u}r smarte Anwendungen
      schafft, die signifikante Vorteile gegen{\"u}ber traditionellen Anwendungen
      bieten, stellt die zunehmende Verbreitung von IoT-f{\"a}higen Ger{\"a}ten auch eine
      immense Gef{\"a}hrdung der Privatheit dar. IoT-Anwendungen sammeln eine Vielzahl
      an Daten und senden diese zur Verarbeitung an ein leistungsstarkes Back-End.
      Hierbei werden umfangreiche Erkenntnisse {\"u}ber den Nutzer gewonnen. Erst dieses
      Wissen erm{\"o}glicht die Servicevielfalt die IoT-Anwendungen bieten. Der Nutzer
      muss daher einen Kompromiss aus Servicequalit{\"a}t und Datenschutz treffen.
      Heutige Datenschutzans{\"a}tze ber{\"u}cksichtigen dies unzureichend und sind dadurch
      h{\"a}ufig zu restriktiv. Aus diesem Grund stellen wir neue Konzepte zum Schutz
      privater Daten f{\"u}r das IoT vor. Diese ber{\"u}cksichtigen die speziellen
      Eigenschaften der im IoT zum Einsatz kommenden Zeitreihendaten. So kann die
      Privatheit des Nutzers gew{\"a}hrleistet werden, ohne die Servicequalit{\"a}t
      unn{\"o}tig einzuschr{\"a}nken. Basierend auf den TICK-Stack beschreiben wir
      Implementierungsans{\"a}tze f{\"u}r unsere Konzepte, die einem
      Privacy-by-Design-Ansatz folgen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-12&amp;engl=1}
}

@inproceedings {INPROC-2019-10,
   author = {Christian Weber and Pascal Hirmer and Peter Reimann and Holger Schwarz},
   title = {{A New Process Model for the Comprehensive Management of Machine Learning Models}},
   booktitle = {Proceedings of the 21st International Conference on Enterprise Information Systems (ICEIS); Heraklion, Crete, Greece, May 3-5, 2019},
   editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {415--422},
   type = {Conference Paper},
   month = {May},
   year = {2019},
   isbn = {978-989-758-372-8},
   doi = {10.5220/0007725304150422},
   keywords = {Model Management; Machine Learning; Analytics Process},
   language = {English},
   cr-category = {I.2 Artificial Intelligence},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The management of machine learning models is an extremely challenging task.
      Hundreds of prototypical models are being built and just a few are mature
      enough to be deployed into operational enterprise information systems. The
      lifecycle of a model includes an experimental phase in which a model is
      planned, built and tested. After that, the model enters the operational phase
      that includes deploying, using, and retiring it. The experimental phase is well
      known through established process models like CRISP-DM or KDD. However, these
      models do not detail on the interaction between the experimental and the
      operational phase of machine learning models. In this paper, we provide a new
      process model to show the interaction points of the experimental and
      operational phase of a machine learning model. For each step of our process, we
      discuss according functions which are relevant to managing machine learning
      models.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-10&amp;engl=1}
}

@inproceedings {INPROC-2019-09,
   author = {Christoph Stach and Bernhard Mitschang},
   title = {{ECHOES: A Fail-safe, Conflict Handling, and Scalable Data Management Mechanism for the Internet of Things}},
   booktitle = {Proceedings of the 23rd European Conference on Advances in Databases and Information Systems: ADBIS '19; Bled, Slovenia, September 8-11, 2019},
   editor = {Tatjana Welzer and Johann Eder and Vili Podgorelec and Aida Kamisalic Latific},
   publisher = {Springer},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Computer Science (LNCS)},
   pages = {1--16},
   type = {Conference Paper},
   month = {September},
   year = {2019},
   keywords = {Internet of Things; Data Exchange; Synchronization Protocol},
   language = {English},
   cr-category = {H.2.7 Database Administration,
                   H.2.4 Database Management Systems},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The Internet of Things (IoT) and Smart Services are becoming increasingly
      popular. Such services adapt to a user's needs by using sensors to detect the
      current situation. Yet, an IoT service has to capture its required data by
      itself, even if another service has already captured it before. There is no
      data exchange mechanism adapted to the IoT which enables sharing of sensor data
      among services and across devices.
      
      Therefore, we introduce a data management mechanism for the IoT. Due to its
      applied state-based synchronization protocol called ECHOES. It is fail-safe in
      case of connection failures, it detects and handles data conflicts, it is
      geared towards devices with limited resources, and it is highly scalable. We
      embed ECHOES into a data provisioning infrastructure, namely the Privacy
      Management Platform and the Secure Data Container. Evaluation results verify
      the practicability of our approach.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-09&amp;engl=1}
}

@inproceedings {INPROC-2019-08,
   author = {Cornelia Kiefer and Peter Reimann and Bernhard Mitschang},
   title = {{A Hybrid Information Extraction Approach Exploiting Structured Data Within a Text Mining Process}},
   booktitle = {18. Fachtagung des GI-Fachbereichs ,,Datenbanken und Informationssysteme (DBIS), 4.-8. M{\"a}rz 2019, Rostock, Germany, Proceedings.},
   editor = {Torsten Grust and Felix Naumann and Alexander B{\"o}hm and Wolfgang Lehner and Theo H{\"a}rder and Erhard et al. Rahm},
   address = {Bonn},
   publisher = {Gesellschaft f$\backslash$``{u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {149--168},
   type = {Conference Paper},
   month = {March},
   year = {2019},
   keywords = {information extraction; clustering; text mining; free text fields},
   language = {English},
   cr-category = {I.2.7 Natural Language Processing},
   ee = {https://doi.org/10.18420/btw2019-10},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Many data sets encompass structured data fields with embedded free text fields.
      The text fields allow customers and workers to input information which cannot
      be encoded in structured fields. Several approaches use structured and
      unstructured data in isolated analyses. The result of isolated mining of
      structured data fields misses crucial information encoded in free text. The
      result of isolated text mining often mainly repeats information already
      available from structured data. The actual information gain of isolated text
      mining is thus limited. The main drawback of both isolated approaches is that
      they may miss crucial information. The hybrid information extraction approach
      suggested in this paper adresses this issue. Instead of extracting information
      that in large parts was already available beforehand, it extracts new, valuable
      information from free texts. Our solution exploits results of analyzing
      structured data within the text mining process, i.e., structured information
      guides and improves the information extraction process on textual data. Our
      main contributions comprise the description of the concept of hybrid
      information extraction as well as a prototypical implementation and an
      evaluation with two real-world data sets from aftersales and production with
      English and German free text fields.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-08&amp;engl=1}
}

@inproceedings {INPROC-2019-07,
   author = {Cornelia Kiefer},
   title = {{Quality Indicators for Text Data}},
   booktitle = {18. Fachtagung des GI-Fachbereichs ,,Datenbanken und Informationssysteme (DBIS), 4.-8. M{\"a}rz 2019, Rostock, Germany, Workshopband.},
   editor = {Holger Meyer and Norbert Ritter and Andreas Thor and Daniela Nicklas and Andreas Heuer and Meike Klettke},
   address = {Bonn},
   publisher = {Gesellschaft f$\backslash$``{u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Dagstuhl Reports},
   pages = {145--154},
   type = {Conference Paper},
   month = {March},
   year = {2019},
   keywords = {data quality; text data quality; text mining; text analysis; quality indicators for text data},
   language = {English},
   cr-category = {I.2.7 Natural Language Processing},
   ee = {https://doi.org/10.18420/btw2019-ws-15},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Textual data sets vary in terms of quality. They have different characteristics
      such as the average sentence length or the amount of spelling mistakes and
      abbreviations. These text characteristics have influence on the quality of text
      mining results. They may be measured automatically by means of quality
      indicators. We present indicators, which we implemented based on natural
      language processing libraries such as Stanford CoreNLP and NLTK. We discuss
      design decisions in the implementation of exemplary indicators and provide all
      indicators on GitHub. In the evaluation, we investigate freetexts from
      production, news, prose, tweets and chat data and show that the suggested
      indicators predict the quality of two text mining modules.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-07&amp;engl=1}
}

@inproceedings {INPROC-2019-06,
   author = {Christoph Stach and Frank Steimle and Clementine Gritti and Bernhard Mitschang},
   title = {{PSSST! The Privacy System for Smart Service Platforms: An Enabler for Confidable Smart Environments}},
   booktitle = {Proceedings of the 4th International Conference on Internet of Things, Big Data and Security (IoTBDS '19)},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--12},
   type = {Conference Paper},
   month = {May},
   year = {2019},
   keywords = {Privacy; Access Control; Internet of Things; Smart Service Platform; Sensors; Actuators; Stream Processing},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The Internet of Things and its applications are becoming increasingly popular.
      Especially Smart Service Platforms like Alexa are in high demand. Such a
      platform retrieves data from sensors, processes them in a back-end, and
      controls actuators in accordance with the results. Thereby, all aspects of our
      everyday life can be managed. In this paper, we reveal the downsides of this
      technology by identifying its privacy threats based on a real-world
      application. Our studies show that current privacy systems do not tackle these
      issues adequately. Therefore, we introduce PSSST!, a user-friendly and
      comprehensive privacy system for Smart Service Platforms limiting the amount of
      disclosed private information while maximizing the quality of service at the
      same time.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-06&amp;engl=1}
}

@inproceedings {INPROC-2019-03,
   author = {Christoph Stach and Corinna Giebler and Simone Schmidt},
   title = {{Zuverl{\"a}ssige Versp{\"a}tungsvorhersagen mithilfe von TAROT}},
   booktitle = {Tagungsband der 18. GI-Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {1--4},
   type = {Demonstration},
   month = {March},
   year = {2019},
   keywords = {Versp{\"a}tungsvorhersage; {\"O}PNV; deskriptive Analyse; pr{\"a}diktive Analyse; Concept Drift},
   language = {German},
   cr-category = {H.2.8 Database Applications,
                   H.2.4 Database Management Systems},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Bei der Einhaltung von Schadstoffwerten nehmen {\"o}ffentliche Verkehrsmittel eine
      immer entscheidendere Rolle ein. Daher wird vermehrt darauf geachtet, deren
      Attraktivit{\"a}t zu erh{\"o}hen. Ein wichtiger Punkt hierbei ist die
      Vorhersagegenauigkeit von Versp{\"a}tungen zu verbessern, damit Fahrg{\"a}ste
      entsprechend planen k{\"o}nnen. Die aktuell angewandten Ans{\"a}tze sind h{\"a}ufig
      ungenau, da sie die zur Verf{\"u}gung stehenden Daten nicht ausreichend nutzen. In
      diesem Beitrag stellen wir daher mit TAROT ein System vor, das mittels
      pr{\"a}diktiver Analysen die Vorhersagegenauigkeit von Versp{\"a}tungen verbessert,
      indem es in den Modellen Versp{\"a}tungsfortpflanzungen ber{\"u}cksichtigt. Dar{\"u}ber
      hinaus ist es in der Lage, im Fall einer St{\"o}rung augenblicklich auf ein
      besseres Vorhersagemodell umzusteigen und auf sowohl schleichende als auch
      abrupte Ver{\"a}nderungen automatisch zu reagieren. Die Vorteile dieser
      Eigenschaften lassen sich in unserem TAROT-Demonstrator anhand von vier
      repr{\"a}sentativen Anwendungsszenarien zeigen. Auch wenn sich die gezeigten
      Szenarien alle auf die Versp{\"a}tungsvorhersage von S-Bahnen beziehen, lassen
      sich die Konzepte von TAROT auch auf viele andere Anwendungsbereiche (z.B. zur
      Bestimmung von Produktionszeiten in der Industrie 4.0) anwenden.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-03&amp;engl=1}
}

@inproceedings {INPROC-2019-02,
   author = {Christoph Stach and Frank Steimle},
   title = {{Recommender-based Privacy Requirements Elicitation - EPICUREAN: An Approach to Simplify Privacy Settings in IoT Applications with Respect to the GDPR}},
   booktitle = {Proceedings of the 34th ACM/SIGAPP Symposium On Applied Computing (PDP)},
   publisher = {ACM Press},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--8},
   type = {Conference Paper},
   month = {April},
   year = {2019},
   keywords = {privacy requirements elicitation; recommender system; knowledge modeling; clustering; association rules; privacy system; IoT; eHealth},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   J.4 Social and Behavioral Sciences,
                   H.3.3 Information Search and Retrieval},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Due to the Internet of Things (IoT), a giant leap towards a quantified self is
      made, i.e., more and more aspects of our lives are being captured, processed,
      and analyzed. This has many positive implications, e.g., Smart Health services
      help to relieve patients as well as physicians and reduce treatment costs.
      However, the price for such services is the disclosure of a lot of private
      data. For this reason, Smart Health services were particularly considered by
      the European General Data Protection Regulation (GDPR): a data subject's
      explicit consent is required when such a service processes his or her data.
      However, the elicitation of privacy requirements is a shortcoming in most IoT
      privacy systems. Either the user is overwhelmed by too many options or s/he is
      not sufficiently involved in the decision process. For this reason, we
      introduce EPICUREAN, a recommender-based privacy requirements elicitation
      approach. EPICUREAN uses modeling and data mining techniques to determine and
      recommend appropriate privacy settings to the user. The user is thus
      considerably supported but remains in full control over his or her private
      data.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2019-02&amp;engl=1}
}

@article {ART-2019-24,
   author = {Daniel Del Gaudio and Pascal Hirmer},
   title = {{A lightweight messaging engine for decentralized data processing in the Internet of Things}},
   journal = {SICS Software-Intensive Cyber-Physical Systems},
   publisher = {Springer Berlin Heidelberg},
   pages = {39--48},
   type = {Article in Journal},
   month = {August},
   year = {2019},
   doi = {https://doi.org/10.1007/s00450-019-00410-z},
   language = {English},
   cr-category = {C.2.4 Distributed Systems},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Today, Internet of Things applications are available throughout many different
      domains (manufacturing, health, cities, homes), enabling a high degree of
      automation to ease people{\^a}€檚 lives. For example, automated heating systems in
      a smart home can lead to reduced costs and an increased comfort for the
      residents. In the IoT, situations can be detected through interpretation of
      data produced by heterogeneous sensors, which typically lead to an invocation
      of actuators. In such applications, sensor data is usually streamed to a
      central instance for processing. However, especially in time-critical
      applications, this is not feasible, since high latency is an issue. To cope
      with this problem, in this paper, we introduce an approach for decentralized
      data processing in the IoT. This leads to decreased latency as well as a
      reduction of costs.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-24&amp;engl=1}
}

@article {ART-2019-22,
   author = {Dimitri Petrik and Mathias Mormul and Peter Reimann},
   title = {{Anforderungen f{\"u}r Zeitreihendatenbanken in der industriellen Edge}},
   journal = {HMD Praxis der Wirtschaftsinformatik},
   publisher = {Springer-Verlag},
   volume = {56},
   pages = {1282--1308},
   type = {Article in Journal},
   month = {October},
   year = {2019},
   doi = {10.1365/s40702-019-00568-9},
   keywords = {Time Series Data; Time Series Database; Industrial IoT; Edge Computing; Defining Requirements; InfluxDB},
   language = {German},
   cr-category = {E.0 Data General},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Das industrielle Internet der Dinge (iIoT) integriert Informations- und
      Kommunikationstechnologien in die industriellen Prozesse und erweitert sie
      durch Echtzeit-Datenanalyse. Eine bedeutende Menge an Daten, die in der
      industriellen Fertigung generiert werden, sind sensorbasierte Zeitreihendaten,
      die in regelm{\"a}{\ss}igen Abst{\"a}nden generiert werden und zus{\"a}tzlich zum
      Sensorwert einen Zeitstempel enthalten. Spezielle Zeitreihen-Datenbanken (TSDB)
      sind daf{\"u}r ausgelegt, die Zeitreihendaten effizienter zu speichern. Wenn TSDBs
      in der N{\"a}he der Maschine (in der industriellen Edge) eingesetzt werden, sind
      Maschinendaten zur {\"U}berwachung zeitkritischer Prozesse aufgrund der niedrigen
      Latenz schnell verf{\"u}gbar, was die erforderliche Zeit f{\"u}r die
      Datenverarbeitung reduziert. Bisherige Untersuchungen zu TSDBs sind bei der
      Auswahl f{\"u}r den Einsatz in der industriellen Edge nur begrenzt hilfreich. Die
      meisten verf{\"u}gbaren Benchmarks von TSDBs sind performanceorientiert und
      ber{\"u}cksichtigen nicht die Einschr{\"a}nkungen der industriellen Edge. Wir
      adressieren diese L{\"u}cke und identifizieren die funktionalen Kriterien f{\"u}r den
      Einsatz von TSDBs im maschinennahen Umfeld und bilden somit einen qualitativen
      Anforderungskatalog. Des Weiteren zeigen wir am Beispiel von InfluxDB, wie
      dieser Katalog verwendet werden kann, mit dem Ziel die Auswahl einer geeigneten
      TSDB f{\"u}r Sensordaten in der Edge zu unterst{\"u}tzen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-22&amp;engl=1}
}

@article {ART-2019-21,
   author = {Christoph Stach},
   title = {{Datenschutzkonzepte f{\"u}r Zeitreihendaten: Bewertung von qualit{\"a}tsbewahrenden Transformationsoperatoren zum Schutz privater Datenmuster in IoT-Anwendungen}},
   journal = {Datenschutz und Datensicherheit - DuD},
   address = {Wiesbaden},
   publisher = {Springer Fachmedien},
   volume = {43},
   number = {12},
   pages = {753--759},
   type = {Article in Journal},
   month = {December},
   year = {2019},
   issn = {1614-0702},
   doi = {10.1007/s11623-019-1201-8},
   keywords = {Datenschutz; Zeitreihendaten; IoT; DSGVO},
   language = {German},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   G.1.10 Numerical Analysis Applications},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Obwohl das Internet der Dinge (IoT) die Voraussetzung f{\"u}r smarte Anwendungen
      schaft, die signifkante Vorteile gegen{\"u}ber traditionellen Anwendungen bieten,
      stellt die zunehmende Verbreitung von IoT-f{\"a}higen Ger{\"a}ten auch eine immense
      Gef{\"a}hrdung der Privatheit dar. IoT-Anwendungen sammeln eine Vielzahl an Daten
      und senden diese zur Verarbeitung an ein Back-End. Hierbei werden umfangreiche
      Erkenntnisse {\"u}ber den Nutzer gewonnen. Erst dieses Wissen erm{\"o}glicht die
      Servicevielfalt, die IoT-Anwendungen bieten. Der Nutzer muss daher einen
      Kompromiss aus Servicequalit{\"a}t und Datenschutz trefen. Heutige
      Datenschutzans{\"a}tze ber{\"u}cksichtigen dies unzureichend und sind dadurch h{\"a}ufg
      zu restriktiv. Daher stellen wir neue Konzepte zum Schutz privater Daten f{\"u}r
      das IoT vor. Diese ber{\"u}cksichtigen die speziellen Eigenschaften von
      IoT-Zeitreihendaten. So kann die Privatheit des Nutzers gew{\"a}hrleistet werden,
      ohne die Servicequalit{\"a}t unn{\"o}tig einzuschr{\"a}nken.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-21&amp;engl=1}
}

@article {ART-2019-12,
   author = {Mathias Mormul and Pascal Hirmer and Matthias Wieland and Bernhard Mitschang},
   title = {{Distributed Situation Recognition in Industry 4.0}},
   journal = {International Journal On Advances in Intelligent Systems},
   publisher = {IARIA},
   volume = {12},
   number = {1},
   pages = {39--49},
   type = {Article in Journal},
   month = {August},
   year = {2019},
   issn = {1942-2679},
   keywords = {Industry 4.0; Edge Computing; Situation Recognition; Distribution Pattern},
   language = {English},
   cr-category = {E.0 Data General},
   ee = {https://www.iariajournals.org/intelligent_systems/intsys_v12_n12_2019_paged.pdf},
   contact = {mathias.mormul@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In recent years, advances in the Internet of Things led to new approaches and
      applications, for example, in the domains Smart Factories or Smart Cities.
      However, with the advantages such applications bring, also new challenges
      arise. One of these challenges is the recognition of situations, e.g., machine
      failures in Smart Factories. Especially in the domain of industrial
      manufacturing, several requirements have to be met in order to deliver a
      reliable and efficient situation recognition. One of these requirements is
      distribution in order to achieve high efficiency. In this article, we present a
      layered modeling approach to enable distributed situation recognition. These
      layers include the modeling, the deployment, and the execution of the situation
      recognition. Furthermore, we enable tool support to decrease the complexity for
      domain users.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-12&amp;engl=1}
}

@article {ART-2019-11,
   author = {Manuel Fritz and Osama Muazzen and Michael Behringer and Holger Schwarz},
   title = {{ASAP-DM: a framework for automatic selection of analytic platforms for data mining}},
   journal = {Software-Intensive Cyber-Physical Systems},
   publisher = {Springer Berlin Heidelberg},
   pages = {1--13},
   type = {Article in Journal},
   month = {August},
   year = {2019},
   issn = {2524-8510},
   isbn = {2524-8529},
   doi = {10.1007/s00450-019-00408-7},
   keywords = {Data mining; Analytic platform; Platform selection},
   language = {English},
   cr-category = {E.0 Data General,
                   H.2.8 Database Applications,
                   H.3.3 Information Search and Retrieval},
   contact = {manuel.fritz@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The plethora of analytic platforms escalates the difficulty of selecting the
      most appropriate analytic platform that fits the needed data mining task, the
      dataset as well as additional user-defined criteria. Especially analysts, who
      are rather focused on the analytics domain, experience difficulties to keep up
      with the latest developments. In this work, we introduce the ASAP-DM framework,
      which enables analysts to seamlessly use several platforms, whereas programmers
      can easily add several platforms to the framework. Furthermore, we investigate
      how to predict a platform based on specific criteria, such as lowest runtime or
      resource consumption during the execution of a data mining task. We formulate
      this task as an optimization problem, which can be solved by today{\^a}€檚
      classification algorithms. We evaluate the proposed framework on several
      analytic platforms such as Spark, Mahout, and WEKA along with several data
      mining algorithms for classification, clustering, and association rule
      discovery. Our experiments unveil that the automatic selection process can save
      up to 99.71\% of the execution time due to automatically choosing a faster
      platform.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-11&amp;engl=1}
}

@article {ART-2019-10,
   author = {Cornelia Kiefer and Peter Reimann and Bernhard Mitschang},
   title = {{QUALM: Ganzheitliche Messung und Verbesserung der Datenqualit{\"a}t in der Textanalyse}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer Verlag},
   pages = {1--12},
   type = {Article in Journal},
   month = {June},
   year = {2019},
   doi = {https://doi.org/10.1007/s13222-019-00318-7},
   keywords = {Datenqualit{\"a}t; Textanalyse; Text Mining; Trainingsdaten; Semantische Ressourcen},
   language = {German},
   cr-category = {H.3 Information Storage and Retrieval},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Bestehende Ans{\"a}tze zur Messung und Verbesserung der Qualit{\"a}t von Textdaten in
      der Textanalyse bringen drei gro{\ss}e Nachteile mit sich. Evaluationsmetriken wie
      zum Beispiel Accuracy messen die Qualit{\"a}t zwar verl{\"a}sslich, sie (1) sind jedoch
      auf aufw{\"a}ndig h{\"a}ndisch zu erstellende Goldannotationen angewiesen und (2) geben
      keine Ansatzpunkte f{\"u}r die Verbesserung der Qualit{\"a}t. Erste dom{\"a}nenspezifische
      Datenqualit{\"a}tsmethoden f{\"u}r unstrukturierte Textdaten kommen zwar ohne
      Goldannotationen aus und geben Ansatzpunkte zur Verbesserung der Datenqualit{\"a}t.
      Diese Methoden wurden jedoch nur f{\"u}r begrenzte Anwendungsgebiete entwickelt und
      (3) ber{\"u}cksichtigen deshalb nicht die Spezifika vieler Analysetools in
      Textanalyseprozessen. In dieser Arbeit pr{\"a}sentieren wir hierzu das
      QUALM-Konzept zum qualitativ hochwertigen Mining von Textdaten (QUALity
      Mining), das die drei o.g. Nachteile adressiert. Das Ziel von QUALM ist es, die
      Qualit{\"a}t der Analyseergebnisse, z. B. bzgl. der Accuracy einer
      Textklassifikation, auf Basis einer Messung und Verbesserung der Datenqualit{\"a}t
      zu erh{\"o}hen. QUALM bietet hierzu eine Menge an QUALM-Datenqualit{\"a}tsmethoden.
      QUALM-Indikatoren erfassen die Datenqualit{\"a}t ganzheitlich auf Basis der Passung
      zwischen den Eingabedaten und den Spezifika der Analysetools, wie den
      verwendeten Features, Trainingsdaten und semantischen Ressourcen (wie zum
      Beispiel W{\"o}rterb{\"u}chern oder Taxonomien). Zu jedem Indikator geh{\"o}rt ein
      passender Modifikator, mit dem sowohl die Daten als auch die Spezifika der
      Analysetools ver{\"a}ndert werden k{\"o}nnen, um die Datenqualit{\"a}t zu erh{\"o}hen. In einer
      ersten Evaluation von QUALM zeigen wir f{\"u}r konkrete Analysetools und
      Datens{\"a}tze, dass die Anwendung der QUALM-Datenqualit{\"a}tsmethoden auch mit einer
      Erh{\"o}hung der Qualit{\"a}t der Analyseergebnisse im Sinne der Evaluationsmetrik
      Accuracy einhergeht. Die Passung zwischen Eingabedaten und Spezifika der
      Analysetools wird hierzu mit konkreten QUALM-Modifikatoren erh{\"o}ht, die zum
      Beispiel Abk{\"u}rzungen aufl{\"o}sen oder automatisch auf Basis von
      Text{\"a}hnlichkeitsmetriken passende Trainingsdaten vorschlagen.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-10&amp;engl=1}
}

@article {ART-2019-09,
   author = {Karoline Saatkamp and Uwe Breitenb{\"u}cher and Oliver Kopp and Frank Leymann},
   title = {{Method, formalization, and algorithms to split topology models for distributed cloud application deployments}},
   journal = {Computing},
   publisher = {Springer Wien},
   pages = {1--21},
   type = {Article in Journal},
   month = {April},
   year = {2019},
   isbn = {10.1007/s00607-019-00721-8},
   keywords = {Application deployment; Distribution; Splitting; Multi-cloud; TOSCA},
   language = {English},
   cr-category = {D.2.2 Software Engineering Design Tools and Techniques},
   ee = {https://link.springer.com/article/10.1007/s00607-019-00721-8},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems;
                  University of Stuttgart, Institute of Architecture of Application Systems},
   abstract = {For automating the deployment of applications in cloud environments, a variety
      of technologies have been developed in recent years. These technologies enable
      to specify the desired deployment in the form of deployment models that can be
      automatically processed by a provisioning engine. However, the deployment
      across several clouds increases the complexity of the provisioning. Using one
      deployment model with a single provisioning engine, which orchestrates the
      deployment across the clouds, forces the providers to expose low-level APIs to
      ensure the accessibility from outside. In this paper, we present an extended
      version of the split and match method to facilitate the division of deployment
      models to multiple models which can be deployed by each provider separately.
      The goal of this approach is to reduce the information and APIs which have to
      be exposed to the outside. We present a formalization and algorithms to
      automate the method. Moreover, we validate the practical feasibility by a
      prototype based on the TOSCA standard and the OpenTOSCA ecosystem.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-09&amp;engl=1}
}

@article {ART-2019-07,
   author = {Manuel Fritz and Michael Behringer and Holger Schwarz},
   title = {{Quality-driven early stopping for explorative cluster analysis for big data}},
   journal = {Software-Intensive Cyber-Physical Systems},
   publisher = {Springer Berlin Heidelberg},
   pages = {1--12},
   type = {Article in Journal},
   month = {February},
   year = {2019},
   issn = {2524-8510},
   isbn = {2524-8529},
   doi = {10.1007/s00450-019-00401-0},
   keywords = {Clustering; Big Data; Early Stop; Convergence; Regression},
   language = {English},
   cr-category = {E.0 Data General,
                   H.2.8 Database Applications,
                   H.3.3 Information Search and Retrieval},
   ee = {https://link.springer.com/article/10.1007/s00450-019-00401-0},
   contact = {manuel.fritz@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Data analysis has become a critical success factor for companies in all areas.
      Hence, it is necessary to quickly gain knowledge from available datasets, which
      is becoming especially challenging in times of big data. Typical data mining
      tasks like cluster analysis are very time consuming even if they run in highly
      parallel environments like Spark clusters. To support data scientists in
      explorative data analysis processes, we need techniques to make data mining
      tasks even more efficient. To this end, we introduce a novel approach to stop
      clustering algorithms as early as possible while still achieving an adequate
      quality of the detected clusters. Our approach exploits the iterative nature of
      many cluster algorithms and uses a metric to decide after which iteration the
      mining task should stop. We present experimental results based on a Spark
      cluster using multiple huge datasets. The experiments unveil that our approach
      is able to accelerate the clustering up to a factor of more than 800 by
      obliterating many iterations which provide only little gain in quality. This
      way, we are able to find a good balance between the time required for data
      analysis and quality of the analysis results.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-07&amp;engl=1}
}

@article {ART-2019-03,
   author = {Karoline Saatkamp and Uwe Breitenb{\"u}cher and Oliver Kopp and Frank Leymann},
   title = {{An approach to automatically detect problems in restructured deployment models based on formalizing architecture and design patterns}},
   journal = {SICS Software-Intensive Cyber-Physical Systems},
   publisher = {Springer Berlin Heidelberg},
   pages = {1--13},
   type = {Article in Journal},
   month = {February},
   year = {2019},
   doi = {10.1007/s00450-019-00397-7},
   keywords = {Topology-based deployment model; Patterns; Problem detection; TOSCA; Logic programming, Prolog},
   language = {English},
   cr-category = {C.2.4 Distributed Systems,
                   D.2.2 Software Engineering Design Tools and Techniques,
                   D.2.12 Software Engineering Interoperability,
                   K.6 Management of Computing and Information Systems},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems;
                  University of Stuttgart, Institute of Architecture of Application Systems},
   abstract = {For the automated deployment of applications, technologies exist which can
      process topology-based deployment models that describes the application's
      structure with its components and their relations. The topology-based
      deployment model of an application can be adapted for the deployment in
      different environments. However, the structural changes can lead to problems,
      which had not existed before and prevent a functional deployment. This includes
      security issues, communication restrictions, or incompatibilities. For example,
      a formerly over the internal network established insecure connection leads to
      security problems when using the public network after the adaptation. In order
      to solve problems in adapted deployment models, first the problems have to be
      detected. Unfortunately, detecting such problems is a highly non-trivial
      challenge that requires deep expertise about the involved technologies and the
      environment. In this paper, we present (i) an approach for detecting problems
      in deployment models using architecture and design patterns and (ii) the
      automation of the detection process by formalizing the problem a pattern solves
      in a certain context. We validate the practical feasibility of our approach by
      a prototypical implementation for the automated problem detection in TOSCA
      topologies.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2019-03&amp;engl=1}
}

@inbook {INBOOK-2019-03,
   author = {Christoph Stach and Frank Steimle and Bernhard Mitschang},
   title = {{How to Realize Device Interoperability and Information Security in mHealth Applications}},
   series = {Biomedical Engineering Systems and Technologies},
   address = {Cham},
   publisher = {Springer Nature},
   series = {Communications in Computer and Information Science},
   volume = {1024},
   pages = {213--237},
   type = {Article in Book},
   month = {August},
   year = {2019},
   isbn = {978-3-030-29195-2},
   doi = {10.1007/978-3-030-29196-9_12},
   keywords = {mHealth; Device interoperability; Information security; COPD},
   language = {English},
   cr-category = {H.5.0 Information Interfaces and Presentation General,
                   K.6.5 Security and Protection,
                   K.8 Personal Computing},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {More and more people suffer from chronic diseases such as the chronic
      obstructive pulmonary disease (COPD). This leads to very high treatment costs
      every year, as such patients require a periodic screening of their condition.
      However, many of these checks can be performed at home by the patients
      themselves. This enables physicians to focus on actual emergencies. Modern
      smart devices such as Smartphones contribute to the success of these
      telemedical approaches. So-called mHealth apps combine the usability and
      versatility of Smartphones with the high accuracy and reliability of medical
      devices for home use. However, patients often face the problem of how to
      connect medical devices to their Smartphones (the device interoperability
      problem). Moreover, many patients reject mHealth apps due to the lack of
      control over their sensitive health data (the information security problem).
      
      In our work, we discuss the usage of the Privacy Management Platform (PMP) to
      solve these problems. So, we describe the structure of mHealth apps and present
      a real-world COPD application. From this application we derive relevant
      functions of an mHealth app, in which device interoperability or information
      security is an issue. We extend the PMP in order to provide support for these
      recurring functions. Finally, we evaluate the utility of these PMP extensions
      based on the real-world mHealth app.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2019-03&amp;engl=1}
}

@inbook {INBOOK-2019-02,
   author = {Christoph Stach and Bernhard Mitschang},
   title = {{Elicitation of Privacy Requirements for the Internet of Things Using ACCESSORS}},
   series = {Information Systems Security and Privacy},
   address = {Cham},
   publisher = {Springer Nature},
   series = {Communications in Computer and Information Science},
   volume = {977},
   pages = {40--65},
   type = {Article in Book},
   month = {July},
   year = {2019},
   isbn = {978-3-030-25108-6},
   doi = {10.1007/978-3-030-25109-3_3},
   keywords = {Permission model; Data-centric; Derivation transparent; Fine-grained; Context-sensitive; Internet of Things; PMP; PATRON},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Novel smart devices are equipped with various sensors to capture context data.
      The Internet of Things (IoT) connects these devices with each other in order to
      bring together data from various domains. Due to the IoT, new application areas
      come up continuously. For instance, the quality of life and living can be
      significantly improved by installing connected and remote-controlled devices in
      Smart Homes. Or the treatment of chronic diseases can be made more convenient
      for both, patients and physicians, by using Smart Health technologies.
      
      For this, however, a large amount of data has to be collected, shared, and
      combined. This gathered data provides detailed insights into the user of the
      devices. Therefore, privacy is a key issue for such IoT applications. As
      current privacy systems for mobile devices focus on a single device only, they
      cannot be applied to a distributed and highly interconnected environment as the
      IoT. Therefore, we determine the special requirements towards a permission
      models for the IoT. Based on this requirements specification, we introduce
      ACCESSORS, a data-centric permission model for the IoT and describe how to
      apply such a model to two promising privacy systems for the IoT, namely the
      Privacy Management Platform (PMP) and PATRON.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2019-02&amp;engl=1}
}

@inbook {INBOOK-2019-01,
   author = {Christoph Stach},
   title = {{Fine-Grained Privacy Control for Fitness and Health Applications Using the Privacy Management Platform}},
   series = {Information Systems Security and Privacy},
   address = {Cham},
   publisher = {Springer Nature},
   series = {Communications in Computer and Information Science},
   volume = {977},
   pages = {1--25},
   type = {Article in Book},
   month = {July},
   year = {2019},
   isbn = {978-3-030-25108-6},
   doi = {10.1007/978-3-030-25109-3_1},
   keywords = {Smartbands; Health and Fitness Applications; Privacy Concerns; Bluetooth; Internet; Privacy Policy Model; Privacy Management Platform},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   D.4.6 Operating Systems Security and Protection,
                   K.8 Personal Computing},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Due to the Internet of Things, novel types of sensors are integrated into
      everyday objects. A domain that benefits most is the fitness and health domain.
      With the advent of the so-called Smartbands - i.e., bracelets or watches with
      built-in sensors such as heart rate sensors, location sensors, or even glucose
      meters - novel fitness and health application are made possible. That way a
      quantified self can be created. Despite all the advantages that such
      applications entail, new privacy concerns arise.
      
      These applications collect and process sensitive health data. Users are
      concerned by reports about privacy violations. These violations are enabled by
      inherent security vulnerabilities and deficiencies in the privacy systems of
      mobile platforms. As none of the existing privacy approaches is designed for
      the novel challenges arising from Smartband applications, we discuss, how the
      Privacy Policy Model (PPM), a fine-grained and modular expandable permission
      model, can be applied to this application area. This model is implemented in
      the Privacy Management Platform (PMP). Thus the outcomes of this work can be
      leveraged directly. Evaluation results underline the benefits of our work for
      Smartband applications.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INBOOK-2019-01&amp;engl=1}
}