@inproceedings {INPROC-2023-12,
   author = {Julius Voggesberger},
   title = {{Optimierung von Klassifikator-Ensembles mit AutoML}},
   booktitle = {Proceedings of the 34th Workshop on Grundlagen von Datenbanken},
   publisher = {CEUR-WS.org},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Workshop Paper},
   month = {June},
   year = {2023},
   keywords = {Maschinelles Lernen; Klassifikator-Ensembles; Klassifikatordiversit{\"a}t; Entscheidungsfusion},
   language = {German},
   cr-category = {I.2 Artificial Intelligence},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Daten f{\"u}r Klassifikationsprobleme weisen oft komplexe Charakteristika auf, die
      zu ungenauen Vorhersagen der mit den Daten trainierten Klassifikatoren f{\"u}hren.
      Beispielsweise kann eine geringe Menge an Daten zu einer {\"U}beranpassung der
      Klassifikatoren f{\"u}hren. Um derartige Probleme zu l{\"o}sen, k{\"o}nnen mehrere
      Klassifikatoren zu einem Ensemble kombiniert werden. Hierf{\"u}r m{\"u}ssen mehrere
      Klassifikatoren trainiert werden, die m{\"o}glichst genau, aber auch divers sind.
      Diversit{\"a}t bedeutet in diesem Fall, dass die Klassifikatoren auf
      unterschiedlichen Dateninstanzen korrekte Vorhersagen treffen. Weiterhin muss
      eine geeignete Methode f{\"u}r die Fusion der einzelnen Klassifikatorvorhersagen
      ausgew{\"a}hlt werden. In dieser Arbeit stellen wir einen AutoML-Ansatz vor, mit
      dem die Erstellung und Optimierung eines Ensembles automatisiert m{\"o}glich ist.
      Der Ansatz wird anhand zweier Echtweltdatens{\"a}tze mit komplexen
      Datencharakteristika evaluiert. Die Ergebnisse der Evaluation zeigen hierbei
      eine Verbesserung der Vorhersagegenauigkeit durch die automatisch erstellten
      Ensembles.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-12&amp;engl=1}
}

@inproceedings {INPROC-2023-07,
   author = {Andrea Fieschi and Yunxuan Li and Pascal Hirmer and Christoph Stach and Bernhard Mitschang},
   title = {{Privacy in Connected Vehicles: Perspectives of Drivers and Car Manufacturers}},
   booktitle = {Service-Oriented Computing: 17th Symposium and Summer School, SummerSOC 2023, Heraklion, Crete, Greece, June 25 – July 1, 2023, Revised Selected Papers},
   editor = {Marco Aiello and Johanna Barzen and Schahram Dustdar and Frank Leymann},
   address = {Cham},
   publisher = {Springer Nature Switzerland},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Communications in Computer and Information Science},
   volume = {1847},
   pages = {59--68},
   type = {Conference Paper},
   month = {October},
   year = {2023},
   isbn = {978-3-031-45727-2},
   doi = {10.1007/978-3-031-45728-9_4},
   keywords = {Connected Vehicles; Privacy; Anonymization},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues},
   contact = {Senden Sie eine E-Mail an Andrea Fieschi (Andrea.Fieschi@ipvs.uni-stuttgart.de) oder Yunxuan Li (Yunxuan.Li@ipvs.uni-stuttgart.de).},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The digital revolution has led to significant technological advancements in the
      automotive industry, enabling vehicles to process and share information with
      other vehicles and the cloud. However, as data sharing becomes more prevalent,
      privacy protection has become an essential issue. In this paper, we explore
      various privacy challenges regarding different perspectives of drivers and car
      manufacturers. We also propose general approaches to overcome these challenges
      with respect to their individual needs. Finally, we highlight the importance of
      collaboration between drivers and car manufacturers to establish trust and
      achieve better privacy protection.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-07&amp;engl=1}
}

@inproceedings {INPROC-2023-06,
   author = {Jan Schneider and Christoph Gr{\"o}ger and Arnold Lutsch and Holger Schwarz and Bernhard Mitschang},
   title = {{Assessing the Lakehouse: Analysis, Requirements and Definition}},
   booktitle = {Proceedings of the 25th International Conference on Enterprise Information Systems, ICEIS 2023, Volume 1, Prague, Czech Republic, April 24-26, 2023},
   editor = {Joaquim Filipe and Michal Smialek and Alexander Brodsky and Slimane Hammoudi},
   address = {Prague},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {44--56},
   type = {Conference Paper},
   month = {May},
   year = {2023},
   isbn = {978-989-758-648-4},
   issn = {2184-4992},
   doi = {10.5220/0011840500003467},
   keywords = {Lakehouse; Data Warehouse; Data Lake; Data Management; Data Analytics},
   language = {English},
   cr-category = {H.2.4 Database Management Systems,
                   H.2.7 Database Administration,
                   H.2.8 Database Applications},
   ee = {https://www.scitepress.org/PublicationsDetail.aspx?ID=9ydI3Lyl2Fk=,
      https://doi.org/10.5220/0011840500003467},
   contact = {jan.schneider@ipvs.uni-stuttgart.de},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The digital transformation opens new opportunities for enterprises to optimize
      their business processes by applying data-driven analysis techniques. For
      storing and organizing the required huge amounts of data, different types of
      data platforms have been employed in the past, with data warehouses and data
      lakes being the most prominent ones. Since they possess rather contrary
      characteristics and address different types of analytics, companies typically
      utilize both of them, leading to complex architectures with replicated data and
      slow analytical processes. To counter these issues, vendors have recently been
      making efforts to break the boundaries and to combine features of both worlds
      into integrated data platforms. Such systems are commonly called lakehouses and
      promise to simplify enterprise analytics architectures by serving all kinds of
      analytical workloads from a single platform. However, it remains unclear how
      lakehouses can be characterized, since existing definitions focus al most
      arbitrarily on individual architectural or functional aspects and are often
      driven by marketing. In this paper, we assess prevalent definitions for
      lakehouses and finally propose a new definition, from which several technical
      requirements for lakehouses are derived. We apply these requirements to several
      popular data management tools, such as Delta Lake, Snowflake and Dremio in
      order to evaluate whether they enable the construction of lakehouses.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-06&amp;engl=1}
}

@inproceedings {INPROC-2023-05,
   author = {Thomas Ackermann and Robert Miehe and Peter Reimann and Bernhard Mitschang and Ralf Takors and Thomas Bauernhansl},
   title = {{A Cross-Disciplinary Training Concept for Future Technologists in the Dawn of Biointelligent Production Systems}},
   booktitle = {Procedia CIRP: Proceedings of 13th CIRP Conference on Learning Factories (CIRP CLF)},
   publisher = {Elsevier BV},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {May},
   year = {2023},
   keywords = {Biointelligent systems; Biological transformation; Converging technologies; Qualification},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Biologicalization is considered one of the most important transformation
      processes in industry alongside digitalization. This work presents a
      qualification concept within the Stuttgart Biointelligent Manufacturing
      Framework (BioMEFUS), which is intended to provide skills and experiences at
      the intersections between manufacturing and process engineering, computer
      science and life science. Life cycle management, production methods and
      engineering of components towards the development and implementation of
      biointelligent systems are considered as the major engineering platforms of the
      framework. The qualification concept is developed for early stage researchers
      (ESRs) at the doctorate stage. It provides a mapping of individual research
      projects in the field of biointelligent production systems and contains
      subject-related and methodological building blocks for the formation of future
      experts and decision-makers in the course of biological transformation.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-05&amp;engl=1}
}

@inproceedings {INPROC-2023-04,
   author = {Julius Voggesberger and Peter Reimann and Bernhard Mitschang},
   title = {{Towards the Automatic Creation of Optimized Classifier Ensembles}},
   booktitle = {Proceedings of the 25th International Conference on Enterprise Information Systems (ICEIS 2023)},
   publisher = {SciTePress},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {614--621},
   type = {Conference Paper},
   month = {April},
   year = {2023},
   keywords = {Classifier Ensembles; Classifier Diversity; Decision Fusion; AutoML; Machine Learning},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Classifier ensemble algorithms allow for the creation of combined machine
      learning models that are more accurate and generalizable than individual
      classifiers. However, creating such an ensemble is complex, as several
      requirements must be fulfilled. An expert has to select multiple classifiers
      that are both accurate and diverse. In addition, a decision fusion algorithm
      must be selected to combine the predictions of these classifiers into a
      consensus decision. Satisfying these requirements is challenging even for
      experts, as it requires a lot of time and knowledge. In this position paper, we
      propose to automate the creation of classifier ensembles. While there already
      exist several frameworks that automatically create multiple classifiers, none
      of them meet all requirements to build optimized ensembles based on these
      individual classifiers. Hence, we introduce and compare three basic approaches
      that tackle this challenge. Based on the comparison results, we propose one of
      the approaches that best meets the requirements to lay the foundation for
      future work.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-04&amp;engl=1}
}

@inproceedings {INPROC-2023-03,
   author = {Yannick Wilhelm and Peter Reimann and Wolfgang Gauchel and Steffen Klein and Bernhard Mitschang},
   title = {{PUSION- A Generic and Automated Framework for Decision Fusion}},
   booktitle = {Proceedings of the 39th IEEE International Conference on Data Engineering (ICDE 2023)},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   type = {Conference Paper},
   month = {April},
   year = {2023},
   keywords = {Classifier ensembles; decision fusion; automated decision fusion; hybrid fault diagnosis},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Combining two or more classifiers into an ensemble and fusing the individual
      classifier decisions to a consensus decision can improve the accuracy for a
      classification problem. The classification improvement of the fusion result
      depends on numerous factors, such as the data set, the combination scenario,
      the decision fusion algorithm, as well as the prediction accuracies and
      diversity of the multiple classifiers to be combined. Due to these factors, the
      best decision fusion algorithm for a given decision fusion problem cannot be
      generally determined in advance. In order to support the user in combining
      classifiers and to achieve the best possible fusion result, we propose the
      PUSION (Python Universal fuSION) framework, a novel generic and automated
      framework for decision fusion of classifiers. The framework includes 14
      decision fusion algorithms and covers a total of eight different combination
      scenarios for both multi-class and multi-label classification problems. The
      introduced concept of AutoFusion detects the combination scenario for a given
      use case, automatically selects the applicable decision fusion algorithms and
      returns the decision fusion algorithm that leads to the best fusion result. The
      framework is evaluated with two real-world case studies in the field of fault
      diagnosis. In both case studies, the consensus decision of multiple classifiers
      and heterogeneous fault diagnosis methods significantly increased the overall
      classification accuracy. Our evaluation results show that our framework is of
      practical relevance and reliably finds the best performing decision fusion
      algorithm for a given combination task.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-03&amp;engl=1}
}

@inproceedings {INPROC-2023-02,
   author = {Dennis Treder-Tschechlov and Peter Reimann and Holger Schwarz and Bernhard Mitschang},
   title = {{Approach to Synthetic Data Generation for Imbalanced Multi-class Problems with Heterogeneous Groups}},
   booktitle = {Tagungsband der 20. Fachtagung Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2019)},
   publisher = {GI Gesellschaft f{\"u}r Informatik e.V. (GI)},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   series = {Lecture Notes in Informatics (LNI)},
   pages = {329--351},
   type = {Conference Paper},
   month = {March},
   year = {2023},
   keywords = {Machine learning; classification; data generation; real-world data characteristics},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   ee = {https://dl.gi.de/bitstream/handle/20.500.12116/40320/B3-5.pdf?},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {To benchmark novel classification algorithms, these algorithms should be
      evaluated on data with characteristics that also appear in real-world use
      cases. Important data characteristics that often lead to challenges for
      classification approaches are multi-class imbalance and heterogeneous groups.
      Heterogeneous groups are sets of real-world entities, where the classification
      patterns may vary among different groups and where the groups are typically
      imbalanced in the data. Real-world data that comprise these characteristics are
      usually not publicly available, e.g., because they constitute sensitive patient
      information or due to privacy concerns. Further, the manifestations of the
      characteristics cannot be controlled specifically on real-world data. A more
      rigorous approach is to synthetically generate data such that different
      manifestations of the characteristics can be controlled as well. However,
      existing data generators are not able to generate data that feature both data
      characteristics, i.e., multi-class imbalance and heterogeneous groups. In this
      paper, we propose an approach that fills this gap as it allows to synthetically
      generate data that exhibit both characteristics. We make use of a taxonomy
      model that organizes real-world entities in domain-specific heterogeneous
      groups to generate data reflecting the characteristics of these groups.
      Further, we incorporate probability distributions to reflect the imbalances of
      multiple classes and groups from real-world use cases. The evaluation shows
      that our approach can generate data that feature the data characteristics
      multi-class imbalance and heterogeneous groups and that it allows to control
      different manifestations of these characteristics.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-02&amp;engl=1}
}

@inproceedings {INPROC-2023-01,
   author = {Yunxuan Li and Pascal Hirmer and Christoph Stach},
   title = {{CV-Priv: Towards a Context Model for Privacy Policy Creation for Connected Vehicles}},
   booktitle = {Proceedings of the 21st International Conference on Pervasive Computing and Communications Workshops},
   publisher = {IEEE},
   institution = {University of Stuttgart, Faculty of Computer Science, Electrical Engineering, and Information Technology, Germany},
   pages = {1--6},
   type = {Conference Paper},
   month = {March},
   year = {2023},
   keywords = {Context Modeling; Ontology; Privacy Policy; Privacy-Preserving; Connected Vehicle},
   language = {English},
   cr-category = {K.4.1 Computers and Society Public Policy Issues,
                   K.6.5 Security and Protection},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Connected vehicles are becoming progressively capable of collecting,
      processing, and sharing data, which leads to a growing concern about privacy in
      the automotive domain. However, research has shown that although users are
      highly concerned about their privacy, they usually find it difficult to
      configure privacy settings. This is because the privacy context, which
      represents the privacy circumstance a driver faces during the privacy policy
      creation, is highly complex. To create custom privacy policies, drivers must
      consider the privacy context information, such as what service is requesting
      data from which vehicle sensor, or what privacy countermeasures are available
      for vehicles and satisfy certain privacy properties. This easily leads to
      information and choice overhead. Therefore, we propose the novel ontology-based
      privacy context model, CV-Priv, for the modeling of such privacy context
      information for creating custom privacy policies in the automotive domain. In
      this paper, we analyze the design requirements for a privacy context model
      based on challenges drivers might face during the privacy policy creation
      phase. We also demonstrate how CV-Priv can be utilized by context-aware systems
      to help drivers transform their fuzzy privacy requirements into sound privacy
      policies.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=INPROC-2023-01&amp;engl=1}
}

@article {ART-2023-07,
   author = {Rebecca Eichler and Christoph Gr{\"o}ger and Eva Hoos and Christoph Stach and Holger Schwarz and Bernhard Mitschang},
   title = {{Introducing the enterprise data marketplace: a platform for democratizing company data}},
   journal = {Journal of Big Data},
   publisher = {Springer Nature},
   volume = {10},
   pages = {1--38},
   type = {Article in Journal},
   month = {November},
   year = {2023},
   issn = {2196-1115},
   doi = {10.1186/s40537-023-00843-z},
   keywords = {Data Catalog; Data Democratization; Data Market; Data Sharing; Enterprise Data Marketplace; Metadata Management},
   language = {English},
   cr-category = {E.m Data Miscellaneous,
                   H.3.7 Digital Libraries,
                   H.4.m Information Systems Applications Miscellaneous},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In this big data era, multitudes of data are generated and collected which
      contain the potential to gain new insights, e.g., for enhancing business
      models. To leverage this potential through, e.g., data science and analytics
      projects, the data must be made available. In this context, data marketplaces
      are used as platforms to facilitate the exchange and thus, the provisioning of
      data and data-related services. Data marketplaces are mainly studied for the
      exchange of data between organizations, i.e., as external data marketplaces.
      Yet, the data collected within a company also has the potential to provide
      valuable insights for this same company, for instance to optimize business
      processes. Studies indicate, however, that a significant amount of data within
      companies remains unused. In this sense, it is proposed to employ an Enterprise
      Data Marketplace, a platform to democratize data within a company among its
      employees. Specifics of the Enterprise Data Marketplace, how it can be
      implemented or how it makes data available throughout a variety of systems like
      data lakes has not been investigated in literature so far. Therefore, we
      present the characteristics and requirements of this kind of marketplace. We
      also distinguish it from other tools like data catalogs, provide a platform
      architecture and highlight how it integrates with the company{\^a}€™s system
      landscape. The presented concepts are demonstrated through an Enterprise Data
      Marketplace prototype and an experiment reveals that this marketplace
      significantly improves the data consumer workflows in terms of efficiency and
      complexity. This paper is based on several interdisciplinary works combining
      comprehensive research with practical experience from an industrial
      perspective. We therefore present the Enterprise Data Marketplace as a distinct
      marketplace type and provide the basis for establishing it within a company.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-07&amp;engl=1}
}

@article {ART-2023-06,
   author = {Christoph Stach and Cl{\'e}mentine Gritti},
   title = {{Editorial to the Special Issue on Security and Privacy in Blockchains and the IoT Volume II}},
   journal = {Future Internet},
   address = {Basel, Schweiz},
   publisher = {MDPI},
   volume = {15},
   number = {8},
   pages = {1--7},
   type = {Article in Journal},
   month = {August},
   year = {2023},
   issn = {1999-5903},
   doi = {10.3390/fi15080272},
   language = {English},
   cr-category = {D.4.6 Operating Systems Security and Protection,
                   K.4.1 Computers and Society Public Policy Issues,
                   K.6.5 Security and Protection},
   ee = {https://www.mdpi.com/1999-5903/15/8/272/htm},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {In this age of data-driven transformation, where the fusion of blockchain
      technologies and the Internet of Things (IoT) is shaping the fabric of our
      digital society, the need for security and privacy has never been more
      important. This Special Issue delves into the intricate confluence of these two
      disruptive forces and provides a comprehensive overview of security and privacy
      aspects in this regard. Focusing on protection goals such as confidentiality,
      integrity, availability, and privacy, this compilation encapsulates the essence
      of these multi-layered challenges. Ranging from complex data-driven
      applications and smart services to novel approaches that enhance security and
      privacy in the context of blockchain technologies and the IoT, the research
      articles and literature reviews presented here offer a sophisticated mesh of
      insights. Innovative solutions are highlighted from a variety of perspectives,
      and challenges such as secure data transmission, confidential communication,
      and tamper-proof data storage are explored.
      
      In this way, this Special Issue is a beacon for practitioners, researchers, and
      technology enthusiasts. Developers seeking to harness the potential of
      blockchain technology and IoT find rich insights while users get a
      comprehensive overview of the latest research and trends. The symphony of
      interdisciplinary knowledge presented here creates a harmonious blend of theory
      and practice, highlighting the intricate interdependencies between
      technological advances and the need for security and privacy.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-06&amp;engl=1}
}

@article {ART-2023-05,
   author = {Christoph Stach and Rebecca Eichler and Simone Schmidt},
   title = {{A Recommender Approach to Enable Effective and Efficient Self-Service Analytics in Data Lakes}},
   journal = {Datenbank-Spektrum},
   publisher = {Springer Nature},
   volume = {23},
   number = {2},
   pages = {123--132},
   type = {Article in Journal},
   month = {June},
   year = {2023},
   issn = {1618-2162},
   doi = {10.1007/s13222-023-00443-4},
   keywords = {Data Lake; Data Preparation; Data Pre-Processing; Data Refinement; Recommender; Self-Service Analytics},
   language = {English},
   cr-category = {H.2.7 Database Administration,
                   E.2 Data Storage Representations,
                   H.3.3 Information Search and Retrieval,
                   H.2.8 Database Applications},
   contact = {Senden Sie eine E-Mail an christoph.stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {As a result of the paradigm shift away from rather rigid data warehouses to
      general-purpose data lakes, fully flexible self-service analytics is made
      possible. However, this also increases the complexity for domain experts who
      perform these analyses, since comprehensive data preparation tasks have to be
      implemented for each data access. For this reason, we developed BARENTS, a
      toolset that enables domain experts to specify data preparation tasks as
      ontology rules, which are then applied to the data involved. Although our
      evaluation of BARENTS showed that it is a valuable contribution to self-service
      analytics, a major drawback is that domain experts do not receive any semantic
      support when specifying the rules. In this paper, we therefore address how a
      recommender approach can provide additional support to domain experts by
      identifying supplementary datasets that might be relevant for their analyses or
      additional data processing steps to improve data refinement. This recommender
      operates on the set of data preparation rules specified in BARENTS-i.e., the
      accumulated knowledge of all domain experts is factored into the data
      preparation for each new analysis. Evaluation results indicate that such a
      recommender approach further contributes to the practicality of BARENTS and
      thus represents a step towards effective and efficient self-service analytics
      in data lakes.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-05&amp;engl=1}
}

@article {ART-2023-04,
   author = {Alejandro Gabriel Villanueva Zacarias and Peter Reimann and Christian Weber and Bernhard Mitschang},
   title = {{AssistML: An Approach to Manage, Recommend and Reuse ML Solutions}},
   journal = {International Journal of Data Science and Analytics (JDSA)},
   publisher = {Springer Nature},
   type = {Article in Journal},
   month = {July},
   year = {2023},
   keywords = {Meta-learning; Machine learning; AutoML; Metadata; Recommender systems},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {The adoption of machine learning (ML) in organizations is characterized by the
      use of multiple ML software components. When building ML systems out of these
      software components, citizen data scientists face practical requirements which
      go beyond the known challenges of ML, e.g., data engineering or parameter
      optimization. They are expected to quickly identify ML system options that
      strike a suitable trade-off across multiple performance criteria. These options
      also need to be understandable for non-technical users. Addressing these
      practical requirements represents a problem for citizen data scientists with
      limited ML experience. This calls for a concept to help them identify suitable
      ML software combinations. Related work, e.g., AutoML systems, are not
      responsive enough or cannot balance different performance criteria. This paper
      explains how AssistML, a novel concept to recommend ML solutions, i.e.,
      software systems with ML models, can be used as an alternative for predictive
      use cases. Our concept collects and preprocesses metadata of existing ML
      solutions to quickly identify the ML solutions that can be reused in a new use
      case. We implement AssistML and evaluate it with two exemplary use cases.
      Results show that AssistML can recommend ML solutions in line with users{\^a}€™
      performance preferences in seconds. Compared to AutoML, AssistML offers citizen
      data scientists simpler, intuitively explained ML solutions in considerably
      less time. Moreover, these solutions perform similarly or even better than
      AutoML models.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-04&amp;engl=1}
}

@article {ART-2023-03,
   author = {Dennis Treder-Tschechlov and Manuel Fritz and Holger Schwarz and Bernhard Mitschang},
   title = {{ML2DAC: Meta-Learning to Democratize AutoML for Clustering Analysis}},
   journal = {Proceedings of the ACM on Management of Data (SIGMOD)},
   publisher = {Association for Computing Machinery (ACM)},
   volume = {1},
   number = {2},
   pages = {1--26},
   type = {Article in Journal},
   month = {June},
   year = {2023},
   doi = {10.1145/3589289},
   language = {German},
   cr-category = {I.5.3 Pattern Recognition Clustering},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Analysts often struggle with the combined algorithm selection and
      hyperparameter optimization problem, a.k.a. CASH problem in literature.
      Typically, they execute several algorithms with varying hyperparameter settings
      to find configurations that show valuable results. Efficiently finding these
      configurations is a major challenge. In clustering analyses, analysts face the
      additional challenge to select a cluster validity index that allows them to
      evaluate clustering results in a purely unsupervised fashion. Many different
      cluster validity indices exist and each one has its benefits depending on the
      dataset characteristics. While experienced analysts might address these
      challenges using their domain knowledge and experience, especially novice
      analysts struggle with them. In this paper, we propose a new meta-learning
      approach to address these challenges. Our approach uses knowledge from past
      clustering evaluations to apply strategies that experienced analysts would
      exploit. In particular, we use meta-learning to (a) select a suitable
      clustering validity index, (b) efficiently select well-performing clustering
      algorithm and hyperparameter configurations, and (c) reduce the search space to
      suitable clustering algorithms. In the evaluation, we show that our approach
      significantly outperforms state-of-the-art approaches regarding accuracy and
      runtime.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-03&amp;engl=1}
}

@article {ART-2023-02,
   author = {Vitali Hirsch and Peter Reimann and Dennis Treder-Tschechlov and Holger Schwarz and Bernhard Mitschang},
   title = {{Exploiting Domain Knowledge to address Class Imbalance and a Heterogeneous Feature Space in Multi-Class Classification}},
   journal = {International Journal on Very Large Data Bases (VLDB-Journal)},
   publisher = {Springer},
   type = {Article in Journal},
   month = {February},
   year = {2023},
   keywords = {Classification; Domain knowledge; Multi-class Imbalance; Heterogeneous feature space},
   language = {English},
   cr-category = {H.2.8 Database Applications},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Real-world data of multi-class classification tasks often show complex data
      characteristics that lead to a reduced classification performance. Major
      analytical challenges are a high degree of multi-class imbalance within data
      and a heterogeneous feature space, which increases the number and complexity of
      class patterns. Existing solutions to classification or data pre- processing
      only address one of these two challenges in isolation. We propose a novel
      classification approach that explicitly addresses both challenges of
      multi-class imbalance and heterogeneous feature space together. As main
      contribution, this approach exploits domain knowledge in terms of a taxonomy to
      systematically prepare the training data. Based on an experimental evaluation
      on both real-world data and several synthetically generated data sets, we show
      that our approach outperforms any other classification technique in terms of
      accuracy. Furthermore, it entails considerable practical benefits in real-world
      use cases, e.g., it reduces rework required in the area of product quality
      control.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-02&amp;engl=1}
}

@article {ART-2023-01,
   author = {Christoph Stach},
   title = {{Data Is the New Oil--Sort of: A View on Why This Comparison Is Misleading and Its Implications for Modern Data Administration}},
   journal = {Future Internet},
   publisher = {MDPI},
   volume = {15},
   number = {2},
   pages = {1--49},
   type = {Article in Journal},
   month = {February},
   year = {2023},
   issn = {1999-5903},
   doi = {10.3390/fi15020071},
   keywords = {data characteristics; data administration; data refinement; reliability; security; privacy},
   language = {English},
   cr-category = {E.0 Data General,
                   H.3 Information Storage and Retrieval,
                   K.6.5 Security and Protection,
                   K.4.1 Computers and Society Public Policy Issues},
   ee = {https://www.mdpi.com/1999-5903/15/2/71/htm},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems},
   abstract = {Currently, data are often referred to as the oil of the 21st century. This
      comparison is not only used to express that the resource data are just as
      important for the fourth industrial revolution as oil was for the technological
      revolution in the late 19th century. There are also further similarities
      between these two valuable resources in terms of their handling. Both must
      first be discovered and extracted from their sources. Then, the raw materials
      must be cleaned, preprocessed, and stored before they can finally be delivered
      to consumers. Despite these undeniable similarities, however, there are
      significant differences between oil and data in all of these processing steps,
      making data a resource that is considerably more challenging to handle. For
      instance, data sources, as well as the data themselves, are heterogeneous,
      which means there is no one-size-fits-all data acquisition solution.
      Furthermore, data can be distorted by the source or by third parties without
      being noticed, which affects both quality and usability. Unlike oil, there is
      also no uniform refinement process for data, as data preparation should be
      tailored to the subsequent consumers and their intended use cases. With regard
      to storage, it has to be taken into account that data are not consumed when
      they are processed or delivered to consumers, which means that the data volume
      that has to be managed is constantly growing. Finally, data may be subject to
      special constraints in terms of distribution, which may entail individual
      delivery plans depending on the customer and their intended purposes. Overall,
      it can be concluded that innovative approaches are needed for handling the
      resource data that address these inherent challenges. In this paper, we
      therefore study and discuss the relevant characteristics of data making them
      such a challenging resource to handle. In order to enable appropriate data
      provisioning, we introduce a holistic research concept from data source to data
      sink that respects the processing requirements of data producers as well as the
      quality requirements of data consumers and, moreover, ensures a trustworthy
      data administration.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=ART-2023-01&amp;engl=1}
}

@book {BOOK-2023-02,
   editor = {Christoph Stach and Cl{\'e}mentine Gritti},
   title = {{Security and Privacy in Blockchains and the IoT II}},
   address = {Basel, Beijing, Wuhan, Barcelona, Belgrade, Novi Sad, Cluj, Manchester},
   publisher = {MDPI},
   series = {Future Internet},
   pages = {480},
   type = {Book},
   month = {September},
   year = {2023},
   isbn = {978-3-0365-8772-1},
   doi = {10.3390/books978-3-0365-8773-8},
   keywords = {authentication; blockchain; demand-driven data provision; digital signatures; distributed ledger technology; encryption; Internet of Things; privacy-aware data processing; secure data management; smart things},
   language = {English},
   cr-category = {K.6.5 Security and Protection,
                   K.4.1 Computers and Society Public Policy Issues},
   ee = {https://www.mdpi.com/books/book/7885},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems;
                  University of Stuttgart, Institute of Parallel and Distributed Systems},
   abstract = {In this age of data-driven transformation, where the fusion of blockchain
      technologies and the Internet of Things (IoT) is shaping the fabric of our
      digital society, the need for security and privacy has never been more
      important. This Special Issue delves into the intricate confluence of these two
      disruptive forces and provides a comprehensive overview of security and privacy
      aspects in this regard. Focusing on protection goals such as confidentiality,
      integrity, availability, and privacy, this compilation encapsulates the essence
      of these multi-layered challenges. Ranging from complex data-driven
      applications and smart services to novel approaches that enhance security and
      privacy in the context of blockchain technologies and the IoT, the research
      articles and literature reviews presented here offer a sophisticated mesh of
      insights. Innovative solutions are highlighted from a variety of perspectives,
      and challenges such as secure data transmission, confidential communication,
      and tamper-proof data storage are explored.
      
      In this way, this Special Issue is a beacon for practitioners, researchers, and
      technology enthusiasts. Developers seeking to harness the potential of
      blockchain technology and IoT find rich insights while users get a
      comprehensive overview of the latest research and trends. The symphony of
      interdisciplinary knowledge presented here creates a harmonious blend of theory
      and practice, highlighting the intricate interdependencies between
      technological advances and the need for security and privacy.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2023-02&amp;engl=1}
}

@book {BOOK-2023-01,
   editor = {Christoph Stach},
   title = {{Security and Privacy in Blockchains and the IoT}},
   address = {Basel, Beijing, Wuhan, Barcelona, Belgrade, Manchester, Tokyo, Cluj, Tianjin},
   publisher = {MDPI},
   series = {Future Internet},
   pages = {166},
   type = {Book},
   month = {January},
   year = {2023},
   isbn = {978-3-0365-6251-3},
   doi = {10.3390/books978-3-0365-6252-0},
   keywords = {Blockchain; IoT; Confidentiality; Integrity; Authenticity; Access Control; Security; Privacy; Efficient Blockchain Technologies; Trustworthy Smart Services; Privacy-Aware Machine Learning; Data Protection Laws},
   language = {English},
   cr-category = {K.6.5 Security and Protection,
                   K.4.1 Computers and Society Public Policy Issues},
   ee = {https://www.mdpi.com/books/book/6686},
   contact = {Senden Sie eine E-Mail an Christoph.Stach@ipvs.uni-stuttgart.de.},
   department = {University of Stuttgart, Institute of Parallel and Distributed Systems, Applications of Parallel and Distributed Systems;
                  University of Stuttgart, Institute of Parallel and Distributed Systems},
   abstract = {In this day and age, data have become an immensely valuable resource. They are
      the key driver that puts the smart into smart services. This is fundamentally
      fueled by two technological achievements, namely the Internet of Things (IoT),
      which enables continuous and comprehensive collection of all kinds of data, and
      blockchain technologies, which provide secure data management and exchange. In
      addition to those information security measures, however, data privacy
      solutions are also required to protect the involved sensitive data. In this
      book, eight research papers address security and privacy challenges when
      dealing with blockchain technologies and the IoT. Concerning the IoT, solutions
      are presented on how IoT group communication can be secured and how trust
      within IoT applications can be increased. In the context of blockchain
      technologies, approaches are introduced on how query processing capabilities
      can be enhanced and how a proof-of-work consensus protocol can be efficiently
      applied in IoT environments. Furthermore, it is discussed how blockchain
      technologies can be used in IoT environments to control access to confidential
      IoT data as well as to enable privacy-aware data sharing. Finally, two reviews
      give an overview of the state of the art in in-app activity recognition based
      on convolutional neural networks and the prospects for blockchain technology
      applications in ambient assisted living.},
   url = {http://www2.informatik.uni-stuttgart.de/cgi-bin/NCSTRL/NCSTRL_view.pl?id=BOOK-2023-01&amp;engl=1}
}

