@inproceedings {INPROC-2022-09,
   author = {Jan Schneider and Pascal Hirmer},
   title = {{Enhancing IoT Platforms for Autonomous Device Discovery and Selection}},
   booktitle = {Service-Oriented Computing},
   editor = {Johanna Barzen and Frank Leymann and Schahram Dustdar},
   publisher = {Springer International Publishing},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Communications in Computer and Information Science},
   volume = {1603},
   pages = {24--44},
   type = {Conference Paper},
   month = {October},
   year = {2022},
   isbn = {978-3-031-18304-1},
   keywords = {Internet of Things; IoT platforms; Device discovery},
   language = {English},
   cr-category = {C.2.1 Network Architecture and Design,
                   C.2.4 Distributed Systems},
   ee = {https://doi.org/10.1007/978-3-031-18304-1_2},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The Internet of Things (IoT) encompasses a variety of technologies that enable
      the formation of adaptive and flexible networks from heterogeneous devices.
      Along with the rising number of applications, the amount of devices within IoT
      ecosystems is constantly increasing. In order to cope with this inherent
      complexity and to enable efficient administration and orchestration of devices,
      IoT platforms have emerged in recent years. While many IoT platforms empower
      users to define application logic for use cases and execute it within an
      ecosystem, they typically rely on static device references, leading to huge
      manual maintenance efforts and low robustness. In this paper, we present an
      approach that allows IoT platforms to autonomously and reliably execute
      pre-defined use cases by automatically discovering and selecting the most
      suitable devices. It establishes loose coupling and hence does not impose major
      technical constraints on the ecosystems in which it is operated.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-09&amp;engl=1}
}

@inproceedings {INPROC-2022-08,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
   title = {{Establishing the Enterprise Data Marketplace: Characteristics, Architecture, and Challenges}},
   booktitle = {Proceedings of the Workshop on Data Science for Data Marketplaces in Conjunction with the 48th International Conference on Very Large Data Bases},
   editor = {Xiaohui Yu and Jian Pei},
   publisher = {-},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--12},
   type = {Workshop Paper},
   month = {September},
   year = {2022},
   language = {English},
   cr-category = {E.m Data Miscellaneous,
                   H.3.7 Digital Libraries,
                   H.4.m Information Systems Applications Miscellaneous},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Companies today have increasing amounts of data at their disposal, most of
      which is not used, leaving the data value unexploited. In order to leverage the
      data value, the data must be democratized, i.e., made available to the company
      employees. In this context, the use of enterprise data marketplaces, platforms
      for trading data within a company, are proposed. However, specifics of
      enterprise data marketplaces and how these can be implemented have not been
      investigated in literature so far. To shed light on these topics, we illustrate
      the characteristics of an enterprise data marketplace and highlight according
      marketplace requirements. We provide an enterprise data marketplace
      architecture, discuss how it integrates into a company's system landscape and
      present an enterprise data marketplace prototype. Finally, we examine
      organizational and technical challenges which arise when operating a
      marketplace in the enterprise context. In this paper, we thereby present the
      enterprise data marketplace as a distinct marketplace type and provide the
      basis for establishing it within a company.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-08&amp;engl=1}
}

@inproceedings {INPROC-2022-07,
   author = {Yunxuan Li and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
   title = {{Ensuring Situation-Aware Privacy for Connected Vehicles}},
   booktitle = {Proceedings of the 12th International Conference on the Internet of Things (IoT); Delft, Netherlands, November 7 - 10, 2022},
   editor = {Evangelos Niforatos and Gerd Kortuem and Nirvana Meratnia and Josh Siegel and Florian Michahelles},
   address = {New York, NY, USA},
   publisher = {ACM},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {135--138},
   type = {Conference Paper},
   month = {November},
   year = {2022},
   isbn = {978-1-4503-9665-3},
   doi = {10.1145/3567445.3569163},
   keywords = {Connected Vehicle; Situation-Awareness; Privacy-Preserving},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   K.6.5 Security and Protection},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {As technology advances in new sensors and software, modern vehicles become
      increasingly intelligent. To date, connected vehicles can collect, process, and
      share data with other entities in connected vehicle environments. However, in
      terms of data collection and exchange, privacy becomes a central issue. It is
      challenging to preserve privacy in connected vehicle environments when the
      privacy demands of drivers could change from situation to situation even for
      the same service. In this paper, we analyze the requirements for a
      privacy-preserving system in connected vehicle environments with a focus on
      situation-awareness and safety aspects. Based on the analysis, we propose a
      novel situation-aware privacy-preserving framework for connected vehicles. Our
      framework supports individual privacy protections for specific end-point
      services and situation-aware privacy protections for different circumstances.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-07&amp;engl=1}
}

@inproceedings {INPROC-2022-06,
   author = {Julian Ziegler and Peter Reimann and Christoph Schulz and Florian Keller and Bernhard Mitschang},
   title = {{A Graph Structure to Discover Patterns in Unstructured Processes of Product Development}},
   booktitle = {Proceedings of the 23rd International Conference on Information Reuse and Integration for Data Science (IRI 2022)},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {August},
   year = {2022},
   keywords = {Process Discovery; Unstructured Processes; Process Patterns; Graph Data; Frequent Subgraph Mining},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {A well-known information reuse approach is to exploit event logs for process
      discovery and optimization. However, process discovery is rarely used for
      product development projects. This is because information systems in product
      development, e. g., Product-Lifecycle-Management (PLM) systems, do not provide
      the event logs required by process discovery algorithms. Additionally, existing
      algorithms struggle with development projects, as these are unstructured and
      rich in variety. In this paper, we propose a novel approach to process
      discovery in order to make it applicable and tailored to product development
      projects. Instead of using flat event logs, we provide a graph-based data
      structure that is able to represent both activities and data of product
      development projects with the dataflow between activities. Based on this
      structure, we can leverage provenance available in PLM systems. Furthermore, we
      may use frequent subgraph mining to discover process patterns. Such patterns
      are well suited to describe different variants and common sub-processes of
      unstructured processes. Using a prototype, we evaluate this approach and
      successfully discover prevailing patterns. These patterns may be used by
      engineers to support their decision-making or help improve the execution of
      development projects.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-06&amp;engl=1}
}

@inproceedings {INPROC-2022-05,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Holger Schwarz and Bernhard Mitschang},
   title = {{Data Shopping — How an Enterprise Data Marketplace Supports Data Democratization in Companies}},
   booktitle = {Proceedings of the 34th International Conference on Intelligent Information Systems},
   editor = {Jochen De Weerdt and Artem Polyvyanyy},
   address = {Stuttgart},
   publisher = {Springer International Publishing},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Rebecca Eichler},
   pages = {19--26},
   type = {Conference Paper},
   month = {May},
   year = {2022},
   isbn = {https://doi.org/10.1007/978-3-031-07481-3_3},
   keywords = {Data Marketplace; Data Sharing; Data Democratization},
   language = {English},
   cr-category = {H.0 Information Systems General},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {To exploit the company's data value, employees must be able to find, understand
      and access it. The process of making corporate data available to the majority
      of the company's employees is referred to as data democratization. In this
      work, we present the current state and challenges of data democratization in
      companies, derived from a comprehensive literature study and expert interviews
      we conducted with a manufacturer. In this context a data consumer's journey is
      presented that reflects the required steps, tool types and roles for finding,
      understanding and accessing data in addition to revealing three data
      democratization challenges. To address these challenges we propose the use of
      an enterprise data marketplace, a novel type of information system for sharing
      data within the company. We developed a prototype based on which a suitability
      assessment of a data marketplace yields an improved consumer journey and
      demonstrates that the marketplace addresses the data democratization challenges
      and consequently, shows that the marketplace is suited for realizing data
      democratization.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-05&amp;engl=1}
}

@inproceedings {INPROC-2022-03,
   author = {Marco Spie{\ss} and Peter Reimann and Christian Weber and Bernhard Mitschang},
   title = {{Analysis of Incremental Learning andWindowing to handle Combined Dataset Shifts on Binary Classification for Product Failure Prediction}},
   booktitle = {Proceedings of the 24th International Conference on Enterprise Information Systems (ICEIS 2022)},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {April},
   year = {2022},
   keywords = {Binary Classification; Dataset Shift; Incremental Learning; Product Failure Prediction; Windowing.},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Dataset Shifts (DSS) are known to cause poor predictive performance in
      supervised machine learning tasks. We present a challenging binary
      classification task for a real-world use case of product failure prediction.
      The target is to predict whether a product, e. g., a truck may fail during the
      warranty period. However, building a satisfactory classifier is difficult,
      because the characteristics of underlying training data entail two kinds of
      DSS. First, the distribution of product configurations may change over time,
      leading to a covariate shift. Second, products gradually fail at different
      points in time, so that the labels in training data may change, which may a
      concept shift. Further, both DSS show a trade-off relationship, i. e.,
      addressing one of them may imply negative impacts on the other one. We discuss
      the results of an experimental study to investigate how different approaches to
      addressing DSS perform when they are faced with both a covariate and a concept
      shift. Thereby, we prove that existing approaches, e. g., incremental learning
      and windowing, especially suffer from the trade-off between both DSS.
      Nevertheless, we come up with a solution for a data-driven classifier that
      yields better results than a baseline solution that does not address DSS.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-03&amp;engl=1}
}

@inproceedings {INPROC-2022-02,
   author = {Florian Hermann and Bowen Chen and Golsa Ghasemi and Valentin Stegmaier and Thomas Ackermann and Peter Reimann and Sabrina Vogt and Thomas Graf and Michael Weyrich},
   title = {{A Digital Twin Approach for the Prediction of the Geometry of Single Tracks Produced by Laser Metal Deposition}},
   booktitle = {Procedia CIRP: Proceedings of the 55th CIRP Conference on Manufacturing Systems (CIRP CMS 2022)},
   publisher = {Elsevier BV},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {June},
   year = {2022},
   keywords = {Laser metal deposition; Software-defined manufacturing; Digital Twin; Asset Administration Shell},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Flexible manufacturing processes such as laser metal deposition have a high
      potential for a production solely defined by software to cope with the current
      challenges of production systems. The determination of suitable machine
      parameters for the production of novel materials and geometries however
      requires extensive experimental effort. Existing simulative approaches do not
      offer sufficient accuracy to predict the relevant machine parameters in a
      satisfactory way. This paper presents a new concept, in which we apply a
      digital twin to provide a step towards a fully software-defined and predictable
      laser metal deposition process. The presented concept includes relevant data of
      the machines as well as data-driven machine learning models and physics-based
      simulation models. This enables a more reliable prediction of geometries of
      single tracks which was validated on a laser metal deposition machine.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-02&amp;engl=1}
}

@inproceedings {INPROC-2022-01,
   author = {Christoph Stach and Cl{\'e}mentine Gritti and Dennis Przytarski and Bernhard Mitschang},
   title = {{Can Blockchains and Data Privacy Laws be Reconciled? A Fundamental Study of How Privacy-Aware Blockchains are Feasible}},
   booktitle = {Proceedings of the 37th ACM/SIGAPP Symposium On Applied Computing},
   editor = {Jiman Hong and Miroslav Bures and Ronald Petrlic and Christoph Sorge},
   address = {Brno},
   publisher = {ACM},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--10},
   type = {Conference Paper},
   month = {April},
   year = {2022},
   isbn = {978-1-4503-8713-2},
   doi = {10.1145/3477314.3506986},
   keywords = {blockchains; immutable; tamper-proof; GDPR; privacy assessment},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   K.6.5 Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Due to the advancing digitalization, the importance of data is constantly
      increasing. Application domains such as smart cars, smart cities, or smart
      healthcare rely on the permanent availability of large amounts of data to all
      parties involved. As a result, the value of data increases, making it a
      lucrative target for cyber-attacks. Particularly when human lives depend on the
      data, additional protection measures are therefore important for data
      management and provision. Blockchains, i.e., decentralized, immutable, and
      tamper-proof data stores, are becoming increasingly popular for this purpose.
      Yet, from a data protection perspective, the immutable and tamper-proof
      properties of blockchains pose a privacy concern. In this paper, we therefore
      investigate whether blockchains are in compliance with the General Data
      Protection Regulation (GDPR) if personal data are involved. To this end, we
      elaborate which articles of the GDPR are relevant in this regard and present
      technical solutions for those legal requirements with which blockchains are in
      conflict. We further identify open research questions that need to be addressed
      in order to achieve a privacy-by-design blockchain system.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2022-01&amp;engl=1}
}

@article {ART-2022-09,
   author = {Christoph Stach},
   title = {{Editorial to the Special Issue on Security and Privacy in Blockchains and the IoT}},
   journal = {Future Internet},
   publisher = {MDPI},
   volume = {14},
   number = {11},
   pages = {1--4},
   type = {Article in Journal},
   month = {November},
   year = {2022},
   issn = {1999-5903},
   doi = {10.3390/fi14110317},
   language = {English},
   cr-category = {D.4.6 Operating Systems Security and Protection,
                   K.4.1 Computers and Society Public Policy Issues,
                   K.6.5 Security and Protection},
   ee = {https://www.mdpi.com/1999-5903/14/11/317/htm},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In this day and age, data have become an immensely valuable resource. They are
      the key driver that puts the smart into smart services. This is fundamentally
      fueled by two technological achievements, namely the Internet of Things (IoT),
      which enables continuous and comprehensive collection of all kinds of data, and
      blockchain technologies, which provide secure data management and exchange. In
      addition to those information security measures, however, data privacy
      solutions are also required to protect the involved sensitive data. In this
      Special Issue, eight research papers address security and privacy challenges
      when dealing with blockchain technologies and the IoT. Concerning the IoT,
      solutions are presented on how IoT group communication can be secured and how
      trust within IoT applications can be increased. In the context of blockchain
      technologies, approaches are introduced on how the capabilities of query
      processing can be enhanced and how a proof-of-work consensus protocol can be
      efficiently applied in IoT environments. Furthermore, it is discussed how
      blockchain technologies can be used in IoT environments to control access to
      confidential IoT data as well as to enable privacy-aware data sharing. Finally,
      two reviews give an overview of the state of the art in in-app activity
      recognition based on convolutional neural networks and the prospects for
      blockchain technology applications in ambient assisted living.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-09&amp;engl=1}
}

@article {ART-2022-08,
   author = {Christoph Stach and Cl{\'e}mentine Gritti and Dennis Przytarski and Bernhard Mitschang},
   title = {{Assessment and Treatment of Privacy Issues in Blockchain Systems}},
   journal = {ACM SIGAPP Applied Computing Review},
   publisher = {ACM},
   volume = {22},
   number = {3},
   pages = {5--24},
   type = {Article in Journal},
   month = {September},
   year = {2022},
   issn = {1559-6915},
   keywords = {blockchain; decentralized; immutable; tamper-proof; GDPR; privacy assessment; data purging; data authentication; permission control; privacy filters; privacy control environment},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   K.6.5 Security and Protection},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The ability to capture and quantify any aspect of daily life via sensors,
      enabled by the Internet of Things (IoT), data have become one of the most
      important resources of the 21st century. However, the high value of data also
      renders data an appealing target for criminals. Two key protection goals when
      dealing with data are therefore to maintain their permanent availability and to
      ensure their integrity. Blockchain technology provides a means of data
      protection that addresses both of these objectives. On that account,
      blockchains are becoming increasingly popular for the management of critical
      data. As blockchains are operated in a decentralized manner, they are not only
      protected against failures, but it is also ensured that neither party has sole
      control over the managed data. Furthermore, blockchains are immutable and
      tamper-proof data stores, whereby data integrity is guaranteed. While these
      properties are preferable from a data security perspective, they also pose a
      threat to privacy and confidentiality, as data cannot be concealed, rectified,
      or deleted once they are added to the blockchain.
      
      In this paper, we therefore investigate which features of the blockchain pose
      an inherent privacy threat when dealing with personal or confidential data. To
      this end, we consider to what extent blockchains are in compliance with
      applicable data protection laws, namely the European General Data Protection
      Regulation (GDPR). Based on our identified key issues, we assess which concepts
      and technical measures can be leveraged to address these issues in order to
      create a privacy-by-design blockchain system.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-08&amp;engl=1}
}

@article {ART-2022-07,
   author = {Christoph Stach and Cl{\'e}mentine Gritti and Julia Br{\"a}cker and Michael Behringer and Bernhard Mitschang},
   title = {{Protecting Sensitive Data in the Information Age: State of the Art and Future Prospects}},
   journal = {Future Internet},
   publisher = {MDPI},
   volume = {14},
   number = {11},
   pages = {1--42},
   type = {Article in Journal},
   month = {October},
   year = {2022},
   issn = {1999-5903},
   doi = {10.3390/fi14110302},
   keywords = {smart service; privacy techniques; location-based services; health services; voice-controlled digital assistants; image analysis; food analysis; recommender systems; DNA sequence classification},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   K.6.5 Security and Protection},
   ee = {https://www.mdpi.com/1999-5903/14/11/302/htm},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The present information age is characterized by an ever-increasing
      digitalization. Smart devices quantify our entire lives. These collected data
      provide the foundation for data-driven services called smart services. They are
      able to adapt to a given context and thus tailor their functionalities to the
      user's needs. It is therefore not surprising that their main resource, namely
      data, is nowadays a valuable commodity that can also be traded. However, this
      trend does not only have positive sides, as the gathered data reveal a lot of
      information about various data subjects. To prevent uncontrolled insights into
      private or confidential matters, data protection laws restrict the processing
      of sensitive data. One key factor in this regard is user-friendly privacy
      mechanisms. In this paper, we therefore assess current state-of-the-art privacy
      mechanisms. To this end, we initially identify forms of data processing applied
      by smart services. We then discuss privacy mechanisms suited for these use
      cases. Our findings reveal that current state-of-the-art privacy mechanisms
      provide good protection in principle, but there is no compelling
      one-size-fits-all privacy approach. This leads to further questions regarding
      the practicality of these mechanisms, which we present in the form of seven
      thought-provoking propositions.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-07&amp;engl=1}
}

@article {ART-2022-06,
   author = {Christoph Stach and Michael Behringer and Julia Br{\"a}cker and Cl{\'e}mentine Gritti and Bernhard Mitschang},
   title = {{SMARTEN — A Sample-Based Approach towards Privacy-Friendly Data Refinement}},
   journal = {Journal of Cybersecurity and Privacy},
   publisher = {MDPI},
   volume = {2},
   number = {3},
   pages = {606--628},
   type = {Article in Journal},
   month = {August},
   year = {2022},
   issn = {2624-800X},
   doi = {10.3390/jcp2030031},
   keywords = {privacy; data refinement; data cleansing; data transformation; human-in-the-loop},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   K.6.5 Security and Protection},
   ee = {https://www.mdpi.com/2624-800X/2/3/31/htm},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Two factors are crucial for the effective operation of modern-day smart
      services: Initially, IoT-enabled technologies have to capture and combine huge
      amounts of data on data subjects. Then, all these data have to be processed
      exhaustively by means of techniques from the area of big data analytics. With
      regard to the latter, thorough data refinement in terms of data cleansing and
      data transformation is the decisive cornerstone. Studies show that data
      refinement reaches its full potential only by involving domain experts in the
      process. However, this means that these experts need full insight into the data
      in order to be able to identify and resolve any issues therein, e.g., by
      correcting or removing inaccurate, incorrect, or irrelevant data records. In
      particular for sensitive data (e.g., private data or confidential data), this
      poses a problem, since these data are thereby disclosed to third parties such
      as domain experts. To this end, we introduce SMARTEN, a sample-based approach
      towards privacy-friendly data refinement to smarten up big data analytics and
      smart services. SMARTEN applies a revised data refinement process that fully
      involves domain experts in data pre-processing but does not expose any
      sensitive data to them or any other third-party. To achieve this, domain
      experts obtain a representative sample of the entire data set that meets all
      privacy policies and confidentiality guidelines. Based on this sample, domain
      experts define data cleaning and transformation steps. Subsequently, these
      steps are converted into executable data refinement rules and applied to the
      entire data set. Domain experts can request further samples and define further
      rules until the data quality required for the intended use case is reached.
      Evaluation results confirm that our approach is effective in terms of both data
      quality and data privacy.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-06&amp;engl=1}
}

@article {ART-2022-01,
   author = {Christoph Stach and Julia Br{\"a}cker and Rebecca Eichler and Corinna Giebler and Bernhard Mitschang},
   title = {{Simplified Specification of Data Requirements for Demand-Actuated Big Data Refinement}},
   journal = {Journal of Data Intelligence},
   publisher = {Rinton Press},
   volume = {3},
   number = {3},
   pages = {366--400},
   type = {Article in Journal},
   month = {August},
   year = {2022},
   issn = {2577-610X},
   keywords = {data pre-processing; data transformation; knowledge modeling; ontology; data management; Data Lakes; zone model; food analysis},
   language = {English},
   cr-category = {H.2.7 Database Administration,
                   E.2 Data Storage Representations,
                   H.3.3 Information Search and Retrieval,
                   H.2.8 Database Applications},
   contact = {Senden Sie eine E-Mail an christoph.stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Data have become one of the most valuable resources in modern society. Due to
      increasing digitalization and the growing prevalence of the Internet of Things,
      it is possible to capture data on any aspect of today's life. Similar to
      physical resources, data have to be refined before they can become a profitable
      asset. However, such data preparation entails completely novel challenges: For
      instance, data are not consumed when being processed, whereby the volume of
      available data that needs to be managed increases steadily. Furthermore, the
      data preparation has to be tailored to the intended use case in order to
      achieve an optimal outcome. This, however, requires the knowledge of domain
      experts. Since such experts are typically not IT experts, they need tools that
      enable them to specify the data requirements of their use cases in a
      user-friendly manner. The goal of this data preparation is to provide any
      emerging use case with demand-actuated data.
      
      With this in mind, we designed a tailorable data preparation zone for Data
      Lakes called BARENTS. It provides a simplified method for domain experts to
      specify how data must be pre-processed for their use cases, and these data
      preparation steps are then applied automatically. The data requirements are
      specified by means of an ontology-based method which is comprehensible to
      non-IT experts. Data preparation and provisioning are realized
      resource-efficient by implementing BARENTS as a dedicated zone for Data Lakes.
      This way, BARENTS is seamlessly embeddable into established Big Data
      infrastructures.
      
      This article is an extended and revised version of the conference paper
      ``Demand-Driven Data Provisioning in Data Lakes: BARENTS - A Tailorable Data
      Preparation Zone'' by Stach et al. In comparison to our original conference
      paper, we take a more detailed look at related work in the paper at hand. The
      emphasis of this extended and revised version, however, is on strategies to
      improve the performance of BARENTS and enhance its functionality. To this end,
      we discuss in-depth implementation details of our prototype and introduce a
      novel recommender system in BARENTS that assists users in specifying data
      preparation steps.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2022-01&amp;engl=1}
}

