@inproceedings {INPROC-2020-57,
   author = {Simon Dreher and Peter Reimann and Christoph Gr{\"o}ger},
   title = {{Application Fields and Research Gaps of Process Mining in Manufacturing Companies}},
   booktitle = {Proceedings of INFORMATIK 2020},
   editor = {R. H. Reussner and A Koziolek and R. Heinrich},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {621--634},
   type = {Conference Paper},
   month = {October},
   year = {2020},
   keywords = {Process Mining; Application; Production; Manufacturing; SCOR; Literature Review},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {To survive in global competition with increasing cost pressure, manufacturing
      companies must continuously optimize their manufacturing-related processes.
      Thereby, process mining constitutes an important data-driven approach to gain a
      profound understanding of the actual processes and to identify optimization
      potentials by applying data mining and machine learning techniques on event
      data. However, there is little knowledge about the feasibility and usefulness
      of process mining specifically in manufacturing companies. Hence, this paper
      provides an overview of potential applications of process mining for the
      analysis of manufacturing-related processes. We conduct a systematic literature
      review, classify relevant articles according to the
      Supply-Chain-Operations-Reference-Model (SCOR-model), identify research gaps,
      such as domain-specific challenges regarding unstructured, cascaded and
      non-linear processes or heterogeneous data sources, and give practitioners
      inspiration which manufacturing-related processes can be analyzed by process
      mining techniques.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-57&amp;engl=1}
}

@inproceedings {INPROC-2020-56,
   author = {Christian Weber and Peter Reimann},
   title = {{MMP - A Platform to Manage Machine Learning Models in Industry 4.0 Environments}},
   booktitle = {Proceedings of the IEEE 24th International Enterprise Distributed Object Computing Workshop (EDOCW)},
   address = {Eindhoven, The Netherlands},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Demonstration},
   month = {July},
   year = {2020},
   keywords = {Model Management; Machine Learning; Collaborative Data Science},
   language = {English},
   cr-category = {H.3.4 Information Storage and Retrieval Systems and Software},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In manufacturing environments, machine learning models are being built for
      several use cases, such as predictive maintenance and product quality control.
      In this context, the various manufacturing processes, machines, and product
      variants make it necessary to create and use lots of different machine learning
      models. This calls for a software system that is able to manage all these
      diverse machine learning models and associated metadata. However, current model
      management systems do not associate models with business and domain context to
      provide non-expert users with tailored functions for model search and
      discovery. Moreover, none of the existing systems provides a comprehensive
      overview of all models within an organization. In our demonstration, we present
      the MMP, our model management platform that addresses these issues. The MMP
      provides a model metadata extractor, a model registry, and a context manager to
      store model metadata in a central metadata store. On top of this, the MMP
      provides frontend components that offer the above-mentioned functionalities. In
      our demonstration, we show two scenarios for model management in Industry 4.0
      environments that illustrate the novel functionalities of the MMP. We
      demonstrate to the audience how the platform and its metadata, linking models
      to their business and domain context, help non-expert users to search and
      discover models. Furthermore, we show how to use MMP's powerful visualizations
      for model reporting, such as a dashboard and a model landscape view.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-56&amp;engl=1}
}

@inproceedings {INPROC-2020-55,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
   title = {{A Zone Reference Model for Enterprise-Grade Data Lake Management}},
   booktitle = {Proceedings of the 24th IEEE Enterprise Computing Conference},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {57--66},
   type = {Conference Paper},
   month = {October},
   year = {2020},
   keywords = {Data Lake; Zones; Reference Model; Industry Case; Industry Experience},
   language = {English},
   cr-category = {H.4 Information Systems Applications},
   contact = {Senden Sie eine E-Mail an corinna.giebler@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Data lakes are on the rise as data platforms for any kind of analytics, from
      data exploration to machine learning. They achieve the required flexibility by
      storing heterogeneous data in their raw format, and by avoiding the need for
      pre-defined use cases. However, storing only raw data is inefficient, as for
      many applications, the same data processing has to be applied repeatedly. To
      foster the reuse of processing steps, literature proposes to store data in
      different degrees of processing in addition to their raw format. To this end,
      data lakes are typically structured in zones. There exists various zone models,
      but they are varied, vague, and no assessments are given. It is unclear which
      of these zone models is applicable in a practical data lake implementation in
      enterprises. In this work, we assess existing zone models using requirements
      derived from multiple representative data analytics use cases of a real-world
      industry case. We identify the shortcomings of existing work and develop a zone
      reference model for enterprise-grade data lake management in a detailed manner.
      We assess the reference model's applicability through a prototypical
      implementation for a real-world enterprise data lake use case. This assessment
      shows that the zone reference model meets the requirements relevant in practice
      and is ready for industry use.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-55&amp;engl=1}
}

@inproceedings {INPROC-2020-54,
   author = {Manuel Fritz and Dennis Tschechlov and Holger Schwarz},
   title = {{Learning from Past Observations: Meta-Learning for Efficient Clustering Analyses}},
   booktitle = {Proceedings of 22nd Big Data Analytics and Knowledge Discovery (DaWaK), 2020},
   editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
   publisher = {Springer, Cham},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Computer Science},
   volume = {12393},
   pages = {364--379},
   type = {Conference Paper},
   month = {September},
   year = {2020},
   isbn = {978-3-030-59065-9},
   doi = {https://doi.org/10.1007/978-3-030-59065-9_28},
   language = {English},
   cr-category = {H.3.3 Information Search and Retrieval},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Many clustering algorithms require the number of clusters as input parameter
      prior to execution. Since the {\^a}best{\^a} number of clusters is most often
      unknown in advance, analysts typically execute clustering algorithms multiple
      times with varying parameters and subsequently choose the most promising
      result. Several methods for an automated estimation of suitable parameters have
      been proposed. Similar to the procedure of an analyst, these estimation methods
      draw on repetitive executions of a clustering algorithm with varying
      parameters. However, when working with voluminous datasets, each single
      execution tends to be very time-consuming. Especially in today{\^a}s Big Data
      era, such a repetitive execution of a clustering algorithm is not feasible for
      an efficient exploration. We propose a novel and efficient approach to
      accelerate estimations for the number of clusters in datasets. Our approach
      relies on the idea of meta-learning and terminates each execution of the
      clustering algorithm as soon as an expected qualitative demand is met. We show
      that this new approach is generally applicable, i.e., it can be used with
      existing estimation methods. Our comprehensive evaluation reveals that our
      approach is able to speed up the estimation of the number of clusters by an
      order of magnitude, while still achieving accurate estimates.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-54&amp;engl=1}
}

@inproceedings {INPROC-2020-53,
   author = {Manuel Fritz and Michael Behringer and Holger Schwarz},
   title = {{LOG-Means: Efficiently Estimating the Number of Clusters in Large Datasets}},
   booktitle = {Proceedings of the 46th International Conference on Very Large Databases (VLDB)},
   editor = {Magdalena Balazinska and Xiaofang Zhou},
   publisher = {ACM Digital Library},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Proceedings of the VLDB Endowment},
   volume = {13 (12)},
   pages = {2118--2131},
   type = {Conference Paper},
   month = {August},
   year = {2020},
   isbn = {ISSN 2150-8097},
   doi = {https://doi.org/10.14778/3407790.3407813},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Clustering is a fundamental primitive in manifold applications. In order to
      achieve valuable results, parameters of the clustering algorithm, e.g., the
      number of clusters, have to be set appropriately, which is a tremendous
      pitfall. To this end, analysts rely on their domain knowledge in order to
      define parameter search spaces. While experienced analysts may be able to
      define a small search space, especially novice analysts often define rather
      large search spaces due to the lack of in-depth domain knowledge. These search
      spaces can be explored in different ways by estimation methods for the number
      of clusters. In the worst case, estimation methods perform an exhaustive search
      in the given search space, which leads to infeasible runtimes for large
      datasets and large search spaces. We propose LOG-Means, which is able to
      overcome these issues of existing methods. We show that LOG-Means provides
      estimates in sublinear time regarding the defined search space, thus being a
      strong fit for large datasets and large search spaces. In our comprehensive
      evaluation on an Apache Spark cluster, we compare LOG-Means to 13 existing
      estimation methods. The evaluation shows that LOG-Means significantly
      outperforms these methods in terms of runtime and accuracy. To the best of our
      knowledge, this is the most systematic comparison on large datasets and search
      spaces as of today.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-53&amp;engl=1}
}

@inproceedings {INPROC-2020-52,
   author = {Daniel Del Gaudio and Maximilian Reichel and Pascal Hirmer},
   title = {{A Life Cycle Method for Device Management in Dynamic IoT Environments}},
   booktitle = {Proceedings of the 5th International Conference on Internet of Things, Big Data and Security - Volume 1: IoTBDS},
   publisher = {SciTePress Digital Library},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {46--56},
   type = {Conference Paper},
   month = {May},
   year = {2020},
   keywords = {Internet of Things, Discovery, Device Integration, Decentralization},
   language = {English},
   cr-category = {C.2.4 Distributed Systems},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In the Internet of Things, interconnected devices communicate with each other
      through standardized internet protocols to reach common goals. By doing so,
      they enable building complex, self-organizing applications, such as Smart
      Cities, or Smart Factories. Especially in large IoT environments, newly
      appearing devices as well as leaving or failing IoT devices are a great
      challenge. New devices need to be integrated into the application whereas
      failing devices need to be dealt with. In a Smart City, newly appearing actors,
      for example, smart phones or connected cars, appear and disappear all the time.
      Dealing with this dynamic is a great issue, especially when done automatically.
      Consequently, in this paper, we introduce A Life Cycle Method for Device
      Management in Dynamic IoT Environments. This method enables integrating newly
      appearing IoT devices into IoT applications and, furthermore, offers means to
      cope with failing devices. Our approach is evaluated through a system
      architecture a nd a corresponding prototypical implementation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-52&amp;engl=1}
}

@inproceedings {INPROC-2020-51,
   author = {Daniel Del Gaudio and Pascal Hirmer},
   title = {{Fulfilling the IoT Vision: Are We There Yet?}},
   booktitle = {In Proceedings of the 5th International Conference on Internet of Things, Big Data and Security - Volume 1: IoTBDS},
   publisher = {SciTePress Digital Library},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {367--374},
   type = {Conference Paper},
   month = {May},
   year = {2020},
   isbn = {978-989-758-426-8},
   keywords = {Internet of Things, Decentralized, Autonomous, Dynamic, Smart},
   language = {German},
   cr-category = {C.2 Computer-Communication Networks},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The vision of the Internet of Things is enabling self-controlled and
      decentralized environments, in which hard- ware devices, equipped with sensors
      and actuators communicate with each other trough standardized internet
      protocols to reach common goals. The device-to-device communication should be
      decentralized and should not necessarily require human interaction. However,
      enabling such complex IoT applications, e.g., connected cars, is a big
      challenge, since many requirements need to be fulfilled. These requirements
      include, for exam- ple, security, privacy, timely data processing, uniform
      communication standards, or location-awareness. Based on an intensive
      literature review, in this overview paper, we define requirements for such
      environments and, in addition, we discuss whether they are fulfilled by
      state-of-the-art approaches or whether there still has to be work done in the
      future. We conclude this paper by illustrating research gaps that have to be
      filled in order to realize the IoT vision.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-51&amp;engl=1}
}

@inproceedings {INPROC-2020-50,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
   booktitle = {Big Data Analytics and Knowledge Discovery: 22nd International Conference, DaWaK 2020, Bratislava, Slovakia, September 1417, 2020, Proceedings},
   publisher = {Springer, Cham},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {73--88},
   type = {Conference Paper},
   month = {September},
   year = {2020},
   language = {English},
   cr-category = {H.2 Database Management},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The substantial increase in generated data induced the development of new
      concepts such as the data lake. A data lake is a large storage repository
      designed to enable flexible extraction of the data{\^a}s value. A key aspect of
      exploiting data value in data lakes is the collection and management of
      metadata. To store and handle the metadata, a generic metadata model is
      required that can reflect metadata of any potential metadata management use
      case, e.g., data versioning or data lineage. However, an evaluation of existent
      metadata models yields that none so far are sufficiently generic. In this work,
      we present HANDLE, a generic metadata model for data lakes, which supports the
      flexible integration of metadata, data lake zones, metadata on various granular
      levels, and any metadata categorization. With these capabilities HANDLE enables
      comprehensive metadata management in data lakes. We show HANDLE{\^a}s feasibility
      through the application to an exemplary access-use-case and a prototypical
      implementation. A comparison with existent models yields that HANDLE can
      reflect the same information and provides additional capabilities needed for
      metadata management in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-50&amp;engl=1}
}

@inproceedings {INPROC-2020-48,
   author = {Dennis Przytarski and Christoph Stach and Cl{\'e}mentine Gritti and Bernhard Mitschang},
   title = {{A Blueprint for a Trustworthy Health Data Platform Encompassing IoT and Blockchain Technologies}},
   booktitle = {Proceedings of the ISCA 29th International Conference on Software Engineering and Data Engineering (Las Vegas, October 2020)},
   publisher = {ISCA in Cooperation with IEEE Computer Society},
   institution = {University of Stuttgart, Faculty of Computer Science, Germany},
   pages = {1--10},
   type = {Conference Paper},
   month = {October},
   year = {2020},
   language = {English},
   cr-category = {H.2.7 Database Administration,
                   K.6.5 Security and Protection},
   department = {University of Stuttgart, Institute of Parallel and Distributed High-Performance Systems, Applications of Parallel and Distributed Systems},
   abstract = {eHealth provides great relief for patients and physicians. This means, patients
      autonomously monitor their condition via IoT medical devices and make these
      data available to physicians for analyses. This requires a data platform that
      takes care of data acquisition, management, and provisioning. As health data
      are highly sensitive, there are major concerns regarding data security with
      respect to confidentiality, integrity, and authenticity. To this end, we
      present a blueprint for constructing a trustworthy health data platform called
      SEAL. It provides a lightweight attribute-based authentication mechanism for
      IoT devices to validate all involved data sources, there is a fine-grained data
      provisioning system to enable data provision according to actual requirements,
      and a verification procedure ensures that data cannot be manipulated.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-48&amp;engl=1}
}

@inproceedings {INPROC-2020-45,
   author = {Rebecca Eichler and Corinna Giebler and Christoph Gr{\"o}ger and Holger Schwarz and Bernhard Mitschang},
   title = {{HANDLE - A Generic Metadata Model for Data Lakes}},
   booktitle = {Big Data Analytics and Knowledge Discovery},
   editor = {Min Song and Il-Yeol Song and Gabriele Kotsis and A Min Tjoa and Ismail Khalil},
   publisher = {Springer Nature Switzerland AG},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Computer Science},
   volume = {12393},
   pages = {73--88},
   type = {Conference Paper},
   month = {September},
   year = {2020},
   doi = {https://doi.org/10.1007/978-3-030-59065-9_7},
   keywords = {Metadata management; Metadata model; Data lake},
   language = {English},
   cr-category = {H.2 Database Management},
   ee = {ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/INPROC-2020-45/INPROC-2020-45.pdf},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The substantial increase in generated data induced the development of new
      concepts such as the data lake. A data lake is a large storage repository
      designed to enable flexible extraction of the data's value. A key aspect of
      exploiting data value in data lakes is the collection and management of
      metadata. To store and handle the metadata, a generic metadata model is
      required that can reflect metadata of any potential metadata management use
      case, e.g., data versioning or data lineage. However, an evaluation of existent
      metadata models yields that none so far are sufficiently generic. In this work,
      we present HANDLE, a generic metadata model for data lakes, which supports the
      flexible integration of metadata, data lake zones, metadata on various granular
      levels, and any metadata categorization. With these capabilities HANDLE enables
      comprehensive metadata management in data lakes. We show HANDLE's feasibility
      through the application to an exemplary access-use-case and a prototypical
      implementation. A comparison with existent models yields that HANDLE can
      reflect the same information and provides additional capabilities needed for
      metadata management in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-45&amp;engl=1}
}

@inproceedings {INPROC-2020-39,
   author = {Ana Cristina Franco da Silva and Pascal Hirmer and Jan Schneider and Seda Ulusal and Matheus Tavares Frigo},
   title = {{MBP: Not just an IoT Platform}},
   booktitle = {2020 IEEE International Conference on Pervasive Computing and Communications Workshops (PerCom Workshops)},
   publisher = {Institute of Electrical and Electronics Engineers, Inc. (IEEE)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--3},
   type = {Demonstration},
   month = {August},
   year = {2020},
   isbn = {978-1-7281-4716-1},
   doi = {10.1109/PerComWorkshops48775.2020.9156156},
   keywords = {Internet of Things; Sensor Integration; IoT environments; Complex Event Processing},
   language = {English},
   cr-category = {H.0 Information Systems General},
   ee = {https://ieeexplore.ieee.org/document/9156156},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In this demonstration paper, we introduce the Multi-purpose Binding and
      Provisioning Platform (MBP), an open-source IoT platform developed for easy
      binding, provisioning, and management of IoT environments. Furthermore, the MBP
      enables the simple realization of IoT applications, such as heating,
      ventilation, air conditioning (HVAC) systems, by allowing users to create rules
      for the IoT environment, in a straightforward and event-condition-action
      fashion. The efficient and timely data processing of IoT environments are
      assured through underlying complex event processing technologies.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-39&amp;engl=1}
}

@inproceedings {INPROC-2020-38,
   author = {Alejandro Villanueva Zacarias and Rachaa Ghabri and Peter Reimann},
   title = {{AD4ML: Axiomatic Design to Specify Machine Learning Solutions for Manufacturing}},
   booktitle = {Proceedings of the 21st International Conference on Information Reuse and Integration for Data Science},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {August},
   year = {2020},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   contact = {manufacturing; machine-learning; design},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Machine learning is increasingly adopted in manufacturing use cases, e.g., for
      fault detection in a production line. Each new use case requires developing its
      own machine learning (ML) solution. A ML solution integrates different software
      components to read, process, and analyze all use case data, as well as to
      finally generate the output that domain experts need for their decision-making.
      The process to design a system specification for a ML solution is not
      straight-forward. It entails two types of complexity: (1) The technical
      complexity of selecting combinations of ML algorithms and software components
      that suit a use case; (2) the organizational complexity of integrating
      different requirements from a multidisciplinary team of, e.g., domain experts,
      data scientists, and IT specialists. In this paper, we propose several
      adaptations to Axiomatic Design in order to design ML solution specifications
      that handle these complexities. We call this Axiomatic Design for Machine
      Learning (AD4ML). We apply AD4ML to specify a ML solution for a fault detection
      use case and discuss to what extent our approach conquers the above-mentioned
      complexities. We also discuss how AD4ML facilitates the agile design of ML
      solutions.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-38&amp;engl=1}
}

@inproceedings {INPROC-2020-37,
   author = {Mathias Mormul and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
   title = {{DEAR: Distributed Evaluation of Alerting Rules}},
   booktitle = {IEEE 13th International Conference on Cloud Computing (CLOUD)},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--1},
   type = {Conference Paper},
   month = {December},
   year = {2020},
   keywords = {cloud monitoring; agent-based; alerting},
   language = {English},
   cr-category = {H.0 Information Systems General},
   contact = {mathias.mormul@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Cloud computing passed the hype cycle long ago and firmly established itself as
      a future technology since then. However, to utilize the cloud as
      cost-efficiently as possible, a continuous monitoring is key to prevent an
      over- or undercommissioning of resources. In large-scaled scenarios, several
      challenges for cloud monitoring, such as high network traffic volume, low
      accuracy of monitoring data, and high time-toinsight, require new approaches in
      IT Operations while considering administrative complexity. To handle these
      challenges, we present DEAR, the Distributed Evaluation of Alerting Rules. DEAR
      is a plugin for monitoring systems which automatically distributes alerting
      rules to the monitored resources to solve the trade-off between high accuracy
      and low network traffic volume without administrative overhead. We evaluate our
      approach against requirements of today{\^a}s IT monitoring and compare it to
      conventional agent-based monitoring approaches.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-37&amp;engl=1}
}

@inproceedings {INPROC-2020-32,
   author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
   title = {{Exploiting Domain Knowledge to Address Multi-Class Imbalance and a Heterogeneous Feature Space in Classification Tasks for Manufacturing Data}},
   booktitle = {Proceedings of the 46th International Conference on Very Large Databases (VLDB)},
   editor = {Magdalena Balazinska and Xiaofang Zhou},
   publisher = {ACM Digital Library},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Proceedings of the VLDB Endowment},
   volume = {13(12)},
   type = {Conference Paper},
   month = {August},
   year = {2020},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Classification techniques are increasingly adopted for quality control in
      manufacturing, e. g., to help domain experts identify the cause of quality
      issues of defective products. However, real-world data often imply a set of
      analytical challenges, which lead to a reduced classification performance.
      Major challenges are a high degree of multi-class imbalance within data and a
      heterogeneous feature space that arises from the variety of underlying
      products. This paper considers such a challenging use case in the area of
      End-of-Line testing, i. e., the final functional test of complex products.
      Existing solutions to classification or data pre-processing only address
      individual analytical challenges in isolation. We propose a novel
      classification system that explicitly addresses both challenges of multi-class
      imbalance and a heterogeneous feature space together. As main contribution,
      this system exploits domain knowledge to systematically prepare the training
      data. Based on an experimental evaluation on real-world data, we show that our
      classification system outperforms any other classification technique in terms
      of accuracy. Furthermore, we can reduce the amount of rework required to solve
      a quality issue of a product.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-32&amp;engl=1}
}

@inproceedings {INPROC-2020-31,
   author = {Yannick Wilhelm and Peter Reimann and Wolfgang Gauchel and Bernhard Mitschang},
   title = {{Overview on Hybrid Approaches to Fault Detection and Diagnosis: Combining Data-driven, Physics-based and Knowledge-based Models}},
   booktitle = {Procedia CIRP: Proceedings of the 14th CIRP Conference on Intelligent Computation in Manufacturing Engineering (CIRP ICME)},
   publisher = {Elsevier BV},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {July},
   year = {2020},
   keywords = {Fault Detection; Fault Diagnosis; Hybrid Methods; Diagnostics and Maintenance; Knowledge-driven Methods; Machine Learning},
   language = {English},
   cr-category = {H.2.8 Database Applications,
                   I.2.1 Applications and Expert Systems},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In this paper, we review hybrid approaches for fault detection and fault
      diagnosis (FDD) that combine data-driven analysis with physics-based and
      knowledge-based models to overcome a lack of data and to increase the FDD
      accuracy. We categorize these hybrid approaches according to the steps of an
      extended common workflow for FDD. This gives practitioners indications of which
      kind of hybrid FDD approach they can use in their application.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-31&amp;engl=1}
}

@inproceedings {INPROC-2020-20,
   author = {Yannick Wilhelm and Ulf Schreier and Peter Reimann and Bernhard Mitschang and Holger Ziekow},
   title = {{Data Science Approaches to Quality Control in Manufacturing: A Review of Problems, Challenges and Architecture}},
   booktitle = {Springer Proceedings Series Communications in Computer and Information Science (CCIS)},
   publisher = {Springer},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {October},
   year = {2020},
   keywords = {Data Science; Machine Learning; Quality Control; Challenges; Functional Architecture},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Manufacturing environments are characterized by non-stationary processes,
      constantly varying conditions, complex process interdependencies, and a high
      number of product variants. These and other aspects pose several challenges for
      common machine learning algorithms to achieve reliable and accurate
      predictions. This overview and vision paper provides a comprehensive list of
      common problems and challenges for data science approaches to quality control
      in manufacturing. We have derived these problems and challenges by inspecting
      three real-world use cases in the eld of product quality control and via a
      comprehensive literature study. We furthermore associate the identi ed problems
      and challenges to individual layers and components of a functional setup, as it
      can be found in manufacturing environments today. Additionally, we extend and
      revise this functional setup and this way propose our vision of a future
      functional software architecture. This functional architecture represents a
      visionary blueprint for solutions that are able to address all challenges for
      data science approaches in manufacturing quality control.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-20&amp;engl=1}
}

@inproceedings {INPROC-2020-19,
   author = {Christian Weber and Pascal Hirmer and Peter Reimann},
   title = {{A Model Management Platform for Industry 4.0 - Enabling Management of Machine Learning Models in Manufacturing Environments}},
   booktitle = {Proceedings of the 23rd International Conference on Business Information Systems (BIS)},
   editor = {Witold Abramowicz and Rainer Alt and Gary Klein and Adrian Paschke and Kurt Sandkuhl},
   publisher = {Springer International Publishing},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Business Information Processing},
   type = {Conference Paper},
   month = {November},
   year = {2020},
   issn = {1865-1348},
   keywords = {Model Management; Machine Learning; Metadata Tracking},
   language = {English},
   cr-category = {H.3.4 Information Storage and Retrieval Systems and Software},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Industry 4.0 use cases such as predictive maintenance and product quality
      control make it necessary to create, use and maintain a multitude of di erent
      machine learning models. In this setting, model management systems help to
      organize models. However, concepts for model management systems currently focus
      on data scientists, but do not support non-expert users such as domain experts
      and business analysts. Thus, it is dicult for them to reuse existing models
      for their use cases. In this paper, we address these challenges and present an
      architecture, a metadata schema and a corresponding model management platform.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-19&amp;engl=1}
}

@inproceedings {INPROC-2020-18,
   author = {Julian Ziegler and Peter Reimann and Florian Keller and Bernhard Mitschang},
   title = {{A Graph-based Approach to Manage CAE Data in a Data Lake}},
   booktitle = {Procedia CIRP: Proceedings of the 53rd CIRP Conference on Manufacturing Systems (CIRP CMS 2020)},
   publisher = {Elsevier},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {July},
   year = {2020},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Computer-aided engineering (CAE) applications generate vast quantities of
      heterogeneous data. Domain experts often fail to explore and analyze these
      data, because they are not integrated across di erent applications. Existing
      data management solutions are rather tailored to scientific applications. In
      our approach, we tackle this issue by combining a data lake solution with
      graph-based metadata management. This provides a holistic view of all CAE data
      and of the data-generating applications in one interconnected structure. Based
      on a prototypical implementation, we discuss how this eases the task of domain
      experts to explore and extract data for further analyses.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-18&amp;engl=1}
}

@inproceedings {INPROC-2020-17,
   author = {Vitali Hirsch and Peter Reimann and Bernhard Mitschang},
   title = {{Incorporating Economic Aspects into Recommendation Ranking to Reduce Failure Costs}},
   booktitle = {Procedia CIRP: Proceedings of the 53rd CIRP Conference on Manufacturing Systems (CIRP CMS 2020)},
   publisher = {Elsevier},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {July},
   year = {2020},
   keywords = {decision support; predictive analytics; quality control; End-of-Line testing; classification; fault isolation; failure costs},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Machine learning approaches for manufacturing usually o er recommendation
      lists, e.g., to support humans in fault diagnosis. For instance, if a product
      does not pass the final check after the assembly, a recommendation list may
      contain likely faulty product components to be replaced. Thereby, the list
      ranks these components using their probabilities. However, these probabilities
      often di er marginally, while economic impacts, e.g., the costs for replacing
      components, di er significantly. We address this issue by proposing an approach
      that incorporates costs to re-rank a list. Our evaluation shows that this
      approach reduces fault-related costs when using recommendation lists to support
      human labor.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-17&amp;engl=1}
}

@inproceedings {INPROC-2020-11,
   author = {Mathias Mormul and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
   title = {{Avoiding Vendor-Lockin in Cloud Monitoring using Generic Agent Templates}},
   booktitle = {Proceedings of the 23rd International Conference on Business Information Systems (BIS), 2020},
   publisher = {Springer},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--1},
   type = {Conference Paper},
   month = {June},
   year = {2020},
   keywords = {Vendor Lock-in; Cloud monitoring; Monitoring agents; Genericity},
   language = {German},
   cr-category = {H.4.0 Information Systems Applications General},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Cloud computing passed the hype cycle long ago and firmly established itself as
      a future technology since then. However, to utilize the cloud optimally, and
      therefore, as cost-efficiently as possible, a continuous monitoring is key to
      prevent an over- or under-commissioning of resources. However, selecting a
      suitable monitoring solution is a challenging task. Monitoring agents that
      collect monitoring data are spread across the monitored IT environment.
      Therefore, the possibility of vendor lock-ins leads to a lack of flexibility
      when the cloud environment or the business needs change. To handle these
      challenges, we introduce $\backslash$textit{generic agent templates} that are applicable
      to many monitoring systems and support a replacement of monitoring systems.
      Solution-specific technical details of monitoring agents are abstracted from
      and system administrators only need to model generic agents, which can be
      transformed into solution-specific monitoring agents. The transformation logic
      required for this process is provided by domain experts to not further burden
      system administrators. Furthermore, we introduce an agent lifecycle to support
      the system administrator with the management and deployment of generic agents.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-11&amp;engl=1}
}

@inproceedings {INPROC-2020-08,
   author = {Mathias Mormul and Christoph Stach},
   title = {{A Context Model for Holistic Monitoring and Management of Complex IT Environments}},
   booktitle = {Proceedings of the 2020 IEEE International Conference on Pervasive Computing and Communications Workshops (CoMoRea)},
   publisher = {IEEE Computer Society},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--1},
   type = {Workshop Paper},
   month = {March},
   year = {2020},
   keywords = {Context Model; IT Operations; AIOps; Monitoring},
   language = {English},
   cr-category = {C.0 Computer Systems Organization, General},
   contact = {Senden Sie eine E-Mail an mathias.mormul@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The increased usage of IoT, containerization, and multiple clouds not only
      changed the way IT works but also the way IT Operations, i. e., the monitoring
      and management of IT assets, works. Monitoring a complex IT environment leads
      to massive amounts of heterogeneous context data, usually spread across
      multiple data silos, which needs to be analyzed and acted upon autonomously.
      However, for a holistic overview of the IT environment, context data needs to
      be consolidated which leads to several problems. For scalable and automated
      processes, it is essential to know what context is required for a given
      monitored resource, where the context data are originating from, and how to
      access them across the data silos. Therefore, we introduce the Monitoring
      Resource Model for the holistic management of context data. We show what
      context is essential for the management of monitored resources and how it can
      be used for context reasoning. Furthermore, we propose a multi-layered
      framework for IT Operations with which we present the benefits of the
      Monitoring Resource Model.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-08&amp;engl=1}
}

@inproceedings {INPROC-2020-07,
   author = {Christoph Stach and Cl{\'e}mentine Gritti and Dennis Przytarski and Bernhard Mitschang},
   title = {{Trustworthy, Secure, and Privacy-aware Food Monitoring Enabled by Blockchains and the IoT}},
   booktitle = {Proceedings of the 18th Annual IEEE International Conference on Pervasive Computing and Communications Workshops (PerCom Workshops), 23-27 March, 2020, Austin, Texas, USA},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--4},
   type = {Conference Paper},
   month = {March},
   year = {2020},
   keywords = {Attribute-based Credentials; Blockchain; Data Authentication; IoT; Privacy; Service Utility; Transparency; Trust},
   language = {English},
   cr-category = {K.6.5 Security and Protection,
                   D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {A large number of food scandals (e.g., falsely declared meat or non-compliance
      with hygiene regulations) are causing considerable concern to consumers.
      Although Internet of Things (IoT) technologies are used in the food industry to
      monitor production (e.g., for tracing the origin of meat or monitoring cold
      chains), the gathered data are not used to provide full transparency to the
      consumer. To achieve this, however, three aspects must be considered: a) The
      origin of the data must be verifiable, i.e., it must be ensured that the data
      originate from calibrated sensors. b) The data must be stored tamper-resistant,
      immutable, and open to all consumers. c) Despite this openness, the privacy of
      affected data subjects (e.g., the carriers) must still be protected. To this
      end, we introduce the SHEEPDOG architecture that {\ss}hepherds`` products from
      production to purchase to enable a trustworthy, secure, and privacy-aware food
      monitoring. In SHEEPDOG, attribute-based credentials ensure trustworthy data
      acquisition, blockchain technologies provide secure data storage, and
      fine-grained access control enables privacy-aware data provision.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-07&amp;engl=1}
}

@inproceedings {INPROC-2020-06,
   author = {Cornelia Kiefer and Peter Reimann and Bernhard Mitschang},
   title = {{Prevent Low-Quality Analytics by Automatic Selection of the Best-Fitting Training Data}},
   booktitle = {Proceedings of the 53rd Hawaii International Conference on System Sciences (HICSS)},
   address = {Maui, Hawaii, USA},
   publisher = {Online},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1036--1045},
   type = {Conference Paper},
   month = {January},
   year = {2020},
   isbn = {978-0-9981331-3-3},
   keywords = {data quality; domain-specific data analysis; text analysis; text similarity; training data},
   language = {English},
   cr-category = {I.2.7 Natural Language Processing},
   ee = {https://scholarspace.manoa.hawaii.edu/bitstream/10125/63868/0103.pdf},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Data analysis pipelines consist of a sequence of various analysis tools. Most
      of these tools are based on supervised machine learning techniques and thus
      rely on labeled training data. Selecting appropriate training data has a
      crucial impact on analytics quality. Yet, most of the times, domain experts who
      construct analysis pipelines neglect the task of selecting appropriate training
      data. They rely on default training data sets, e.g., since they do not know
      which other training data sets exist and what they are used for. Yet, default
      training data sets may be very different from the domain-specific input data
      that is to be analyzed, leading to low-quality results. Moreover, these input
      data sets are usually unlabeled. Thus, information on analytics quality is not
      measurable with evaluation metrics. Our contribution comprises a method that
      (1) indicates the expected quality to the domain expert while constructing the
      analysis pipeline, without need for labels and (2) automatically selects the
      best-fitting training data. It is based on a measurement of the similarity
      between input and training data. In our evaluation, we consider the
      part-of-speech tagger tool and show that Latent Semantic Analysis (LSA) and
      Cosine Similarity are suited as indicators for the quality of analysis results
      and as basis for an automatic selection of the best-fitting training data.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-06&amp;engl=1}
}

@inproceedings {INPROC-2020-04,
   author = {Christoph Stach and Cl{\'e}mentine Gritti and Bernhard Mitschang},
   title = {{Bringing Privacy Control back to Citizens: DISPEL - A Distributed Privacy Management Platform for the Internet of Things}},
   booktitle = {Proceedings of the 35th ACM/SIGAPP Symposium On Applied Computing (PDP).},
   address = {Brno, Czech Republic},
   publisher = {ACM Press},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--8},
   type = {Conference Paper},
   month = {March},
   year = {2020},
   keywords = {privacy; IoT; authorization concept; attribute-based access control},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The Internet of Things (IoT) is becoming increasingly popular. It enables a
      variety of novel applications. Such applications require a lot of data about
      their users. To this end, sensors continuously monitor various aspects of daily
      life. Despite the indisputable benefits of IoT applications, this is a severe
      privacy threat. Due to the GDPR coming into force, there is a need for action
      on the part of IoT vendors. In this paper, we therefore introduce a Privacy by
      Design approach for IoT applications called DISPEL. It provides a configuration
      method enabling users to specify globally, which application may access what
      data for which purpose. Privacy protection is then applied at the earliest
      stage possible, i.e., directly on the IoT devices generating the data. Data
      transmission is protected against unauthorized access and manipulation.
      Evaluation results show that DISPEL fulfills the requirements towards an IoT
      privacy system.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-04&amp;engl=1}
}

@inproceedings {INPROC-2020-03,
   author = {Christoph Stach and Corinna Giebler and Manuela Wagner and Christian Weber and Bernhard Mitschang},
   title = {{AMNESIA: A Technical Solution towards GDPR-compliant Machine Learning}},
   booktitle = {Proceedings of the 6th International Conference on Information Systems Security and Privacy (ICISSP 2020)},
   editor = {Steven Furnell and Paolo Mori and Edgar Weippl and Olivier Camp},
   address = {Valletta, Malta},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--12},
   type = {Conference Paper},
   month = {February},
   year = {2020},
   keywords = {Machine Learning; Data Protection; Privacy Zones; Access Control; Model Management; Provenance; GDPR},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   I.5.1 Pattern Recognition Models},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Machine Learning (ML) applications are becoming increasingly valuable due to
      the rise of IoT technologies. That is, sensors continuously gather data from
      different domains and make them available to ML for learning its models. This
      provides profound insights into the data and enables predictions about future
      trends. While ML has many advantages, it also represents an immense privacy
      risk. Data protection regulations such as the GDPR address such privacy
      concerns, but practical solutions for the technical enforcement of these laws
      are also required. Therefore, we introduce AMNESIA, a privacy-aware machine
      learning model provisioning platform. AMNESIA is a holistic approach covering
      all stages from data acquisition to model provisioning. This enables to control
      which application may use which data for ML as well as to make models ``forget''
      certain knowledge.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2020-03&amp;engl=1}
}

@article {ART-2020-20,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer},
   volume = {20},
   pages = {57--69},
   type = {Article in Journal},
   month = {January},
   year = {2020},
   keywords = {Data Lakes; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
   language = {German},
   cr-category = {H.4 Information Systems Applications},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
      zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
      Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
      Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
      Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
      Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
      unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
      eines global agierenden Industrieunternehmens genutzt, um bestehende
      Herausforderungen zu identifizieren und Anforderungen an Data Lakes
      herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
      Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
      abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
      bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
      Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
      4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
      Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-20&amp;engl=1}
}

@article {ART-2020-19,
   author = {Christoph Stach and Julia Br{\"a}cker and Rebecca Eichler and Corinna Giebler and Cl{\'e}mentine Gritti},
   title = {{How to Provide High-Utility Time Series Data in a Privacy-Aware Manner: A VAULT to Manage Time Series Data}},
   journal = {International Journal On Advances in Security},
   editor = {Hans-Joachim Hof Hof and Birgit Gersbeck-Schierholz},
   publisher = {IARIA},
   volume = {13},
   number = {3\&4},
   pages = {1--21},
   type = {Article in Journal},
   month = {December},
   year = {2020},
   issn = {1942-2636},
   keywords = {Time Series Data; Privacy Filters; Aggregation; Interpolation; Smoothing; Information Emphasis; Noise; Data Quality; Authentication; Permission Model; Data Management},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   D.4.6 Operating Systems Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Smart Services enrich many aspects of our daily lives, such as in the Ambient
      Assisted Living (AAL) domain, where the well-being of patients is automatically
      monitored, and patients have more autonomy as a result. A key enabler for such
      services is the Internet of Things (IoT). Using IoT-enabled devices, large
      amounts of (partly private) data are continuously captured, which can be then
      gathered and analyzed by Smart Services. Although these services bring many
      conveniences, they therefore also pose a serious threat to privacy. In order to
      provide the highest quality of service, they need access to as many data as
      possible and even reveal more private information due to in-depth data
      analyses. To ensure privacy, however, data minimization is required. Users are
      thus forced to balance between service quality and privacy. Current IoT privacy
      approaches do not re{\"\i}ect this discrepancy properly. Furthermore, as users are
      often not experienced in the proper handling of privacy mechanisms, this leads
      to an overly restrictive behavior. Instead of charging users with privacy
      control, we introduce VAULT, a novel approach towards a privacy-aware
      management of sensitive data. Since in the IoT time series data have a special
      position, VAULT is particularly tailored to this kind of data. It attempts to
      achieve the best possible tradeoff between service quality and privacy for each
      user. To this end, VAULT manages the data and enables a demand-based and
      privacy-aware provision of the data, by applying appropriate privacy {\"\i}lters
      which ful{\"\i}ll not only the quality requirements of the Smart Services but also
      the privacy requirements of users. In doing so, VAULT pursues a Privacy by
      Design approach.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-19&amp;engl=1}
}

@article {ART-2020-11,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer-Verlag},
   volume = {20},
   pages = {57--69},
   type = {Article in Journal},
   month = {January},
   year = {2020},
   keywords = {Data Lakes; Industryerfahrung},
   language = {German},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
      zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
      Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
      Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
      Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
      Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
      unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
      eines global agierenden Industrieunternehmens genutzt, um bestehende
      Herausforderungen zu identifizieren und Anforderungen an Data Lakes
      herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
      Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
      abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
      bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
      Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
      4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
      Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-11&amp;engl=1}
}

@article {ART-2020-10,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen - Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank Spektrum},
   publisher = {Springer-Verlag},
   volume = {20},
   pages = {57--69},
   type = {Article in Journal},
   month = {January},
   year = {2020},
   keywords = {Data Lakes; Industryerfahrung},
   language = {German},
   cr-category = {H.2.1 Database Management Logical Design},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
      zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
      Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
      Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
      Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
      Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
      unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
      eines global agierenden Industrieunternehmens genutzt, um bestehende
      Herausforderungen zu identifizieren und Anforderungen an Data Lakes
      herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
      Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
      abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
      bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
      Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
      4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
      Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-10&amp;engl=1}
}

@article {ART-2020-04,
   author = {Corinna Giebler and Christoph Gr{\"o}ger and Eva Hoos and Rebecca Eichler and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Lakes auf den Grund gegangen: Herausforderungen und Forschungsl{\"u}cken in der Industriepraxis}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer},
   volume = {20},
   number = {1},
   pages = {57--69},
   type = {Article in Journal},
   month = {January},
   year = {2020},
   doi = {10.1007/s13222-020-00332-0},
   keywords = {Data Lake; Analytics; Stand der Technik; Herausforderungen; Praxisbeispiel},
   language = {German},
   cr-category = {A.1 General Literature, Introductory and Survey,
                   E.0 Data General},
   ee = {https://rdcu.be/b0WM8},
   contact = {Senden Sie eine E-Mail an Corinna.Giebler@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Unternehmen stehen zunehmend vor der Herausforderung, gro{\ss}e, heterogene Daten
      zu verwalten und den darin enthaltenen Wert zu extrahieren. In den letzten
      Jahren kam darum der Data Lake als neuartiges Konzept auf, um diese komplexen
      Daten zu verwalten und zu nutzen. Wollen Unternehmen allerdings einen solchen
      Data Lake praktisch umsetzen, so sto{\ss}en sie auf vielf{\"a}ltige
      Herausforderungen, wie beispielsweise Widerspr{\"u}che in der Definition oder
      unscharfe und fehlende Konzepte. In diesem Beitrag werden konkrete Projekte
      eines global agierenden Industrieunternehmens genutzt, um bestehende
      Herausforderungen zu identifizieren und Anforderungen an Data Lakes
      herzuleiten. Diese Anforderungen werden mit der verf{\"u}gbaren Literatur zum
      Thema Data Lake sowie mit existierenden Ans{\"a}tzen aus der Forschung
      abgeglichen. Die Gegen{\"u}berstellung zeigt, dass f{\"u}nf gro{\ss}e Forschungsl{\"u}cken
      bestehen: 1. Unklare Datenmodellierungsmethoden, 2. Fehlende
      Data-Lake-Referenzarchitektur, 3. Unvollst{\"a}ndiges Metadatenmanagementkonzept,
      4. Unvollst{\"a}ndiges Data-Lake-Governance-Konzept, 5. Fehlende ganzheitliche
      Realisierungsstrategie.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2020-04&amp;engl=1}
}

