@article{dionis2024andps, title={Sensorimotor Learning with Stability Guarantees via Autonomous Neural Dynamic Policies}, author={Dionis Totsila*,
Konstantinos Chatzilygeroudis*
, Valerio Modugno, Denis Hadjivelichkov, and Dimitrios Kanoulas}, year={2024}, booktitle={Submitted to IEEE Robotics and Automation Letters (RA-L)}, url={https://nosalro.github.io/andps/}, video={https://www.youtube.com/watch?v=ZI9-TLSovpQ}, code={https://github.com/NOSALRO/andps}, abstract={State-of-the-art sensorimotor learning algorithms, either in the context of reinforcement learning or imitation learning, offer policies that can often produce unstable behaviors, damaging the robot and/or the environment. Moreover, it is very difficult to interpret the optimized controller and analyze its behavior and/or performance. Traditional robot learning, on the contrary, relies on dynamical system-based policies that can be analyzed for stability/safety. Such policies, however, are neither flexible nor generic and usually work only with proprioceptive sensor states. In this work, we bridge the gap between generic neural network policies and dynamical system-based policies, and we introduce Autonomous Neural Dynamic Policies (ANDPs) that: (a) are based on autonomous dynamical systems, (b) always produce asymptotically stable behaviors, and (c) are more flexible than traditional stable dynamical system-based policies. ANDPs are fully differentiable, flexible generic-policies that can be used for both imitation learning and reinforcement learning setups, while ensuring asymptotic stability. Through several experiments, we explore the flexibility and capacity of ANDPs in several imitation learning tasks including experiments with image observations. The results show that ANDPs combine the benefits of both neural network-based and dynamical system-based methods.} }
@article{Chatzilygeroudis2024, doi = {10.21105/joss.06771}, url = {https://doi.org/10.21105/joss.06771}, code = {https://nosalro.github.io/robot_dart}, year = {2024}, publisher = {The Open Journal}, volume = {9}, number = {102}, pages = {6771}, author = {
Konstantinos Chatzilygeroudis
, Dionis Totsila, and Jean-Baptiste Mouret}, title = {RobotDART: a versatile robot simulator for robotics and machine learning researchers}, journal = {Journal of Open Source Software} } @article{syriopoulos2024optimizing, title={Optimizing Doubly Stochastic Matrices for Average Consensus through Swarm and Evolutionary Algorithms}, author={Panos Syriopoulos,
Konstantinos Chatzilygeroudis
, Nektarios Kalampalikis, and Michael Vrahatis}, year={2024}, booktitle={Annals of Mathematics and Artificial Intelligence}, url={https://link.springer.com/article/10.1007/s10472-023-09912-8?utm_source=rct_congratemailt&utm_medium=email&utm_campaign=nonoa_20240608&utm_content=10.1007%2Fs10472-023-09912-8}, abstract={Doubly-stochastic matrices play a vital role in modern applications of complex networks such as tracking and decentralized state estimation, coordination and control of autonomous agents. A central theme in all of the above is consensus, that is, nodes reaching agreement about the value of an underlying variable (e.g. the state of the environment). Despite the fact that complex networks have been studied thoroughly, the communication graphs are usually described by symmetric matrices due to their advantageous theoretical properties. We do not yet have methods for optimizing generic doubly-stochastic matrices. In this paper, we propose a novel formulation and framework, EvoDSM, for achieving fast linear distributed averaging by: (a) optimizing the weights of a fixed graph topology, and (b) optimizing for the topology itself. We are concerned with graphs that can be described by positive doubly-stochastic matrices. Our method relies on swarm and evolutionary optimization algorithms and our experimental results and analysis showcase that our method (1) achieves comparable performance with traditional methods for symmetric graphs, (2) is applicable to non-symmetric network structures and edge weights, and (3) is scalable and can operate effectively with moderately large graphs without engineering overhead.} } @article{allard2022telo, title={Online Damage Recovery for Physical Robots with Hierarchical Quality-Diversity}, author={Maxime Allard, Simón C. Smith,
Konstantinos Chatzilygeroudis
, Bryan Lim, and Antoine Cully}, year={2023}, booktitle={ACM Transactions on Evolutionary Learning and Optimization}, url={https://arxiv.org/abs/2210.09918}, abstract={In real-world environments, robots need to be resilient to damages and robust to unforeseen scenarios. Quality-Diversity (QD) algorithms have been successfully used to make robots adapt to damages in seconds by leveraging a diverse set of learned skills. A high diversity of skills increases the chances of a robot to succeed at overcoming new situations since there are more potential alternatives to solve a new task. However, finding and storing a large behavioural diversity of multiple skills often leads to an increase in computational complexity. Furthermore, robot planning in a large skill space is an additional challenge that arises with an increased number of skills. Hierarchical structures can help to reduce this search and storage complexity by breaking down skills into primitive skills. In this paper, we extend the analysis of the Hierarchical Trial and Error algorithm, which uses a hierarchical behavioural repertoire to learn diverse skills and leverages them to make the robot adapt quickly in the physical world. We show that the hierarchical decomposition of skills enables the robot to learn more complex behaviours while keeping the learning of the repertoire tractable. Experiments with a hexapod robot both in simulation and the physical world show that our method solves a maze navigation task with up to 20% respectively 43% less actions than the best baselines while having 78% less complete failures.} } @article{khadivar2023self, title={Self-Correcting Quadratic Programming-Based Robot Control}, author={Farshad Khadivar*,
Konstantinos Chatzilygeroudis
*, and Aude Billard}, year={2023}, journal={IEEE Transactions on Systems, Man, and Cybernetics: Systems}, url={https://infoscience.epfl.ch/record/301975?&ln=en}, video={https://www.youtube.com/watch?v=cA-_SKoO_9c&ab_channel=KonstantinosChatzilygeroudis}, abstract={Quadratic Programming (QP)-based controllers allow many robotic systems, such as humanoids, to successfully undertake complex motions and interactions. However, these approaches rely heavily on adequately capturing the underlying model of the environment and the robot's dynamics. This assumption, nevertheless, is rarely satisfied, and we usually turn to well-tuned end-effector PD controllers to compensate for model mismatches. In this paper, we propose to augment traditional QP-based controllers with a learned residual inverse dynamics model and an adaptive control law that adjusts the QP online to account for model uncertainties and unforeseen disturbances. In particular, we propose (i) learning a residual inverse dynamics model using the Gaussian Process and linearizing it so that it can be incorporated inside the QP-control optimization procedure and (ii) a novel combination of adaptive control and QP-based methods to avoid the manual tuning of end-effector PID controllers and faster convergence in learning the residual dynamics model. In simulation, we extensively evaluate our method in several robotic scenarios ranging from a 7-DoFs manipulator tracking a trajectory to a humanoid robot performing a waving motion for which the model used by the controller and the one used in the simulated world do not match (unmodeled dynamics). Finally, we also validate our approach in physical robotic scenarios where a 7-DoFs robotic arm performs tasks where the model of the environment (mass, friction coefficients, etc.) is not fully known.} } @article{tsinganos2022behavior, title={Behavior Policy Learning: Learning Multi-Stage Tasks via Solution Sketches and Model-Based Controllers}, author={Konstantinos Tsinganos*,
Konstantinos Chatzilygeroudis
*, Denis Hadjivelichkov, Theodoros Komninos, Evangelos Dermatas, and Dimitrios Kanoulas}, journal={Frontiers in Robotics and AI}, year={2022}, url={https://www.frontiersin.org/articles/10.3389/frobt.2022.974537/full}, video={https://www.youtube.com/watch?v=5ueUBvq65O4}, abstract={Multi-stage tasks are a challenge for reinforcement learning methods, and require either specific task knowledge (e.g., task segmentation) or big amount of interaction times to be learned. In this paper, we propose Behavior Policy Learning (BPL) that effectively combines 1) only few solution sketches, that is demonstrations without the actions, but only the states, 2) model-based controllers, and 3) simulations to effectively solve multi-stage tasks without strong knowledge about the underlying task. Our main intuition is that solution sketches alone can provide strong data for learning a high-level trajectory by imitation, and model-based controllers can be used to follow this trajectory (we call it behavior) effectively. Finally, we utilize robotic simulations to further improve the policy and make it robust in a Sim2Real style. We evaluate our method in simulation with a robotic manipulator that has to perform two tasks with variations: 1) grasp a box and place it in a basket, and 2) re-place a book on a different level within a bookcase. We also validate the Sim2Real capabilities of our method by performing real-world experiments and realistic simulated experiments where the objects are tracked through an RGB-D camera for the first task.} } @article{paul2018robustrl, title={Robust Reinforcement Learning with Bayesian Optimisation and Quadrature}, author={Supratik Paul,
Konstantinos Chatzilygeroudis
, Kamil Ciosek, Jean-Baptiste Mouret, Michael Osborne, and Shimon Whiteson}, year={2020}, url={https://jmlr.org/papers/volume21/18-216/18-216.pdf}, video={https://www.youtube.com/watch?v=R8Ss-dhDCmo&feature=youtu.be&ab_channel=Jean-BaptisteMouret}, journal={Journal of Machine Learning Research (JMLR), Special Issue on Bayesian Optimization}, abstract={Bayesian optimisation has been successfully applied to a variety of reinforcement learning problems. However, the traditional approach for learning optimal policies in simulators does not utilise the opportunity to improve learning by adjusting certain environment variables: state features that are unobservable and randomly determined by the environment in a physical setting but are controllable in a simulator. This article considers the problem of finding a robust policy while taking into account the impact of environment variables. We present alternating optimisation and quadrature (ALOQ), which uses Bayesian optimisation and Bayesian quadrature to address such settings. We also present transferable ALOQ (TALOQ), for settings where simulator inaccuracies lead to difficulty in transferring the learnt policy to the physical system. We show that our algorithms are robust to the presence of significant rare events, which may not be observable under random sampling but play a substantial role in determining the optimal policy. Experimental results across different domains show that our algorithms learn robust policies efficiently.} } @article{chatzilygeroudis2019benchmark, title={Benchmark for Bimanual Robotic Manipulation of Semi-deformable Objects}, author={
Konstantinos Chatzilygeroudis
, Bernardo Fichera, Ilaria Lauzana, Fanjun Bu, Kunpeng Yao, Farshad Khadivar, and Aude Billard}, year={2020}, journal={Robotics and Automation Letters (RA-L): Special Issue on Benchmarking Protocols for Robotic Manipulation}, code={https://github.com/epfl-lasa/sahr_benchmark}, url={https://www.epfl.ch/labs/lasa/sahr/benchmark/} } @article{sanchez2019benchmark, title={Benchmark for Human-to-Robot Handovers of Unseen Containers with Unknown Filling}, author={Ricardo Sanchez-Matilla*,
Konstantinos Chatzilygeroudis
*, Apostolos Modas, Nuno Ferreira Duarte, Alessio Xompero, Pascal Frossard, Aude Billard, and Andrea Cavallaro}, year={2020}, journal={Robotics and Automation Letters (RA-L): Special Issue on Benchmarking Protocols for Robotic Manipulation}, code={https://github.com/CORSMAL/Benchmark}, url={http://corsmal.eecs.qmul.ac.uk/benchmark.html} } @article{chatzilygeroudis2018survey, title={A survey on policy search algorithms for learning robot controllers in a handful of trials}, author={
Konstantinos Chatzilygeroudis
, Vassilis Vassiliades, Freek Stulp, Sylvain Calinon and Jean-Baptiste Mouret}, year={2019}, journal={IEEE Transactions on Robotics (T-RO)}, url={https://arxiv.org/abs/1807.02303}, abstract={Most policy search algorithms require thousands of training episodes to find an effective policy, which is often infeasible with a physical robot. This survey article focuses on the extreme other end of the spectrum: how can a robot adapt with only a handful of trials (a dozen) and a few minutes? By analogy with the word "big-data", we refer to this challenge as "micro-data reinforcement learning". We show that a first strategy is to leverage prior knowledge on the policy structure (e.g., dynamic movement primitives), on the policy parameters (e.g., demonstrations), or on the dynamics (e.g., simulators). A second strategy is to create data-driven surrogate models of the expected reward (e.g., Bayesian optimization) or the dynamical model (e.g., model-based policy search), so that the policy optimizer queries the model instead of the real system. Overall, all successful micro-data algorithms combine these two strategies by varying the kind of model and prior knowledge. The current scientific challenges essentially revolve around scaling up to complex robots (e.g., humanoids), designing generic priors, and optimizing the computing time.} } @article{chatzilygeroudis2018resetfree, title={{Reset-free Trial-and-Error Learning for Robot Damage Recovery}}, author={
Konstantinos Chatzilygeroudis
, Vassilis Vassiliades and Jean-Baptiste Mouret}, journal={Robotics and Autonomous Systems}, url={https://arxiv.org/abs/1610.04213}, year={2018}, video={https://youtu.be/IqtyHFrb3BU}, code={https://github.com/resibots/chatzilygeroudis_2018_rte}, abstract={The high probability of hardware failures prevents many advanced robots (e.g., legged robots) from being confidently deployed in real-world situations (e.g., post-disaster rescue). Instead of attempting to diagnose the failures, robots could adapt by trial-and-error in order to be able to complete their tasks. In this situation, damage recovery can be seen as a Reinforcement Learning (RL) problem. However, the best RL algorithms for robotics require the robot and the environment to be reset to an initial state after each episode, that is, the robot is not learning autonomously. In addition, most of the RL methods for robotics do not scale well with complex robots (e.g., walking robots) and either cannot be used at all or take too long to converge to a solution (e.g., hours of learning). In this paper, we introduce a novel learning algorithm called "Reset-free Trial-and-Error" (RTE) that (1) breaks the complexity by pre-generating hundreds of possible behaviors with a dynamics simulator of the intact robot, and (2) allows complex robots to quickly recover from damage while completing their tasks and taking the environment into account. We evaluate our algorithm on a simulated wheeled robot, a simulated six-legged robot, and a real six-legged walking robot that are damaged in several ways (e.g., a missing leg, a shortened leg, faulty motor, etc.) and whose objective is to reach a sequence of targets in an arena. Our experiments show that the robots can recover most of their locomotion abilities in an environment with obstacles, and without any human intervention.} } @article{cully2018limbo, title={Limbo: A Flexible High-performance Library for Gaussian Processes modeling and Data-Efficient Optimization}, author={Antoine Cully,
Konstantinos Chatzilygeroudis
, Federico Allocati and Jean-Baptiste Mouret}, year={2018}, journal={The Journal of Open Source Software}, publisher={The Open Journal}, url={http://joss.theoj.org/papers/10.21105/joss.00545}, code={https://github.com/resibots/limbo}, abstract={Limbo (LIbrary for Model-Based Optimization) is an open-source C++11 library for Gaussian Processes and data-efficient optimization (e.g., Bayesian optimization) that is designed to be both highly flexible and very fast. It can be used as a state-of-the-art optimization library or to experiment with novel algorithms with “plugin” components. Limbo is currently mostly used for data-efficient policy search in robot learning and online adaptation because computation time matters when using the low-power embedded computers of robots. For example, Limbo was the key library to develop a new algorithm that allows a legged robot to learn a new gait after a mechanical damage in about 10-15 trials (2 minutes), and a 4-DOF manipulator to learn neural networks policies for goal reaching in about 5 trials. The implementation of Limbo follows a policy-based design that leverages C++ templates: this allows it to be highly flexible without the cost induced by classic object-oriented designs (cost of virtual functions). The regression benchmarks show that the query time of Limbo’s Gaussian processes is several orders of magnitude better than the one of GPy (a state-of-the-art Python library for Gaussian processes) for a similar accuracy (the learning time highly depends on the optimization algorithm chosen to optimize the hyper-parameters). The black-box optimization benchmarks demonstrate that Limbo is about 2 times faster than BayesOpt (a C++ library for data-efficient optimization) for a similar accuracy and data-efficiency. In practice, changing one of the components of the algorithms in Limbo (e.g., changing the acquisition function) usually requires changing only a template definition in the source code. This design allows users to rapidly experiment and test new ideas while keeping the software as fast as specialized code. Limbo takes advantage of multi-core architectures to parallelize the internal optimization processes (optimization of the acquisition function, optimization of the hyper-parameters of a Gaussian process) and it vectorizes many of the linear algebra operations (via the Eigen 3 library and optional bindings to Intel’s MKL). The library is distributed under the CeCILL-C license via a Github repository. The code is standard-compliant but it is currently mostly developed for GNU/Linux and Mac OS X with both the GCC and Clang compilers. New contributors can rely on a full API reference, while their developments are checked via a continuous integration platform (automatic unit-testing routines). Limbo is currently used in the ERC project ResiBots, which is focused on data-efficient trial-and-error learning for robot damage recovery, and in the H2020 projet PAL, which uses social robots to help coping with diabetes. It has been instrumental in many scientific publications since 2015} } @article{vassiliades2017scaling, title={Using Centroidal Voronoi Tessellations to Scale Up the Multi-dimensional Archive of Phenotypic Elites Algorithm}, author={Vassilis Vassiliades,
Konstantinos Chatzilygeroudis
and Jean-Baptiste Mouret}, journal={IEEE Transactions on Evolutionary Computation}, year={2017}, url={https://arxiv.org/abs/1610.05729}, code={https://github.com/resibots/vassiliades_2017_cvt_map_elites}, abstract={The recently introduced Multi-dimensional Archive of Phenotypic Elites (MAP-Elites) is an evolutionary algorithm capable of producing a large archive of diverse, high-performing solutions in a single run. It works by discretizing a continuous feature space into unique regions according to the desired discretization per dimension. While simple, this algorithm has a main drawback: it cannot scale to high-dimensional feature spaces since the number of regions increase exponentially with the number of dimensions. In this paper, we address this limitation by introducing a simple extension of MAP-Elites that has a constant, pre-defined number of regions irrespective of the dimensionality of the feature space. Our main insight is that methods from computational geometry could partition a high-dimensional space into well-spread geometric regions. In particular, our algorithm uses a centroidal Voronoi tessellation (CVT) to divide the feature space into a desired number of regions; it then places every generated individual in its closest region, replacing a less fit one if the region is already occupied. We demonstrate the effectiveness of the new "CVT-MAP-Elites" algorithm in high-dimensional feature spaces through comparisons against MAP-Elites in maze navigation and hexapod locomotion tasks.} }
@incollection{chatzilygeroudis2020qd, title={Quality-Diversity Optimization: a novel branch of stochastic optimization}, author={
Konstantinos Chatzilygeroudis
, Antoine Cully, Vassilis Vassiliades, and Jean-Baptiste Mouret}, editor={Panos Pardalos, Michael Vrahatis, Varvara Rasskazova}, booktitle={Black Box Optimization, Machine Learning and No-Free Lunch Theorems}, publisher={Springer}, url={https://arxiv.org/abs/2012.04322}, year={2021} } @incollection{chatzilygeroudis2020ml, title={Machine Learning Basics}, author={
Konstantinos Chatzilygeroudis
, Ioannis Hatzilygeroudis, and Isidoros Perikos}, editor={Andreas Komninos, Parisa Eslambolchilar, Mark Dunlop}, booktitle={Intelligent Computing for Interactive System Design: Statistics, Digital Signal Processing and Machine Learning in practice}, publisher={ACM}, year={2021} }
@inproceedings{tsikelis2024gait, title={Gait Optimization for Legged Systems Through Mixed Distribution Cross-Entropy Optimization}, author={Ioannis Tsikelis*, and
Konstantinos Chatzilygeroudis
*}, booktitle={IEEE-RAS International Conference on Humanoid Robots (Humanoids)}, url={https://nosalro.github.io/cregopt/}, video={https://www.youtube.com/watch?v=N0bk9KOIHdI&ab_channel=NOSALRO}, year={2024}, abstract={Legged robotic systems can play an important role in real-world applications due to their superior load-bearing capabilities, enhanced autonomy, and effective navigation on uneven terrain. They offer an optimal trade-off between mobility and payload capacity, excelling in diverse environments while maintaining efficiency in transporting heavy loads. However, planning and optimizing gaits and gait sequences for these robots presents significant challenges due to the complexity of their dynamic motion and the numerous optimization variables involved. Traditional trajectory optimization methods address these challenges by formulating the problem as an optimization task, aiming to minimize cost functions, and to automatically discover contact sequences. Despite their structured approach, optimization-based methods face substantial difficulties, particularly because such formulations result in highly nonlinear and difficult to solve problems. To address these limitations, we propose CrEGOpt, a bi-level optimization method that combines traditional trajectory optimization with a black-box optimization scheme. CrEGOpt at the higher level employs the Mixed Distribution Cross-Entropy Method to optimize both the gait sequence and the phase durations, thus simplifying the lower level trajectory optimization problem. This approach allows for fast solutions of complex gait optimization problems. Extensive evaluation in simulated environments demonstrates that CrEGOpt can find solutions for biped, quadruped, and hexapod robots in under 10 seconds. This novel bi-level optimization scheme offers a promising direction for future research in automatic contact scheduling.} } @inproceedings{asimakopoulos2024lion, title={Effective Kinodynamic Planning and Exploration through Quality Diversity and Trajectory Optimization}, author={Konstantinos Asimakopoulos, Aristeidis Androutsopoulos, Michael Vrahatis, and
Konstantinos Chatzilygeroudis
}, year={2024}, booktitle={The 18th Learning and Intelligent Optimization Conference (LION)}, url={./files/LION18.pdf}, abstract={Efficient and rapid kinodynamic planning is crucial for numerous real-world robotics applications. Various methods have been proposed to address this challenge, primarily falling into two categories: (a) randomized planners and (b) trajectory optimization utilizing simplified models and numerical optimization. Randomized planners such as RRT and PRM excel in exploring the state space, while trajectory optimization methods, like direct collocation, are adept at discovering optimal trajectories within well-defined spaces. We aim to achieve effective and efficient kinodynamic planning and exploration by integrating evolutionary algorithms (Quality-Diversity) with trajectory optimization. Our preliminary experiments showcase that using the proposed methodology we get the best from both worlds on two simulated experiments.} } @inproceedings{chatzilygeroudis2023evolving, title={Evolving Dynamic Locomotion Policies in Minutes}, author={
Konstantinos Chatzilygeroudis
, Constantinos Tsakonas, and Michael Vrahatis}, year={2023}, video={https://www.youtube.com/watch?v=VdyUlAAWMzQ}, booktitle={The Fourteenth International Conference on Information, Intelligence, Systems and Applications (IISA 2023)}, url={./files/IISA2023-EvoLoco.pdf}, abstract={Many effective evolutionary methods have been proposed that allow robots to learn how to walk. Most of the proposed methods have one or more of the following drawbacks: (a) utilization of hand designed open loop policies that cannot scale to different robots, and/or (b) requiring big wall time due to sample inefficiency and simulation costs, a fact that limits the practical usage of those algorithms. In this paper, we propose to combine (a) a simplified model for locomotion dynamics, and (b) the effectiveness of Quality-Diversity Algorithms, and propose a novel algorithm that is able to evolve, in less than an hour on a standard computer, generic (e.g. neural network), reactive locomotion policies that operate with a local view of the world. Our approach makes it possible to generate in a few minutes reactive policies for locomotion that can perform dynamic motions like jumps. We also present preliminary results of transferring the behaviors to realistic simulators using a whole body inverse kinematics solver and a joint impedance controller.} } @inproceedings{tsakoans2023effective, title={Effective Skill Learning via Autonomous Goal Representation Learning}, author={Constantinos Tsakonas, and
Konstantinos Chatzilygeroudis
}, year={2023}, video={https://www.youtube.com/watch?v=x-j5mid6jxM}, booktitle={The Fourteenth International Conference on Information, Intelligence, Systems and Applications (IISA 2023)}, url={./files/IISA2023-AGRL.pdf}, abstract={A long standing goal of robotics researchers is to develop robots that are able to develop in an autonomous open-ended manner through lifelong learning and interactions. If we are to see robots learning in an autonomous and open-ended manner, we need to develop methods for incremental and autonomous skill discovery and trial-and-error learning. In other words, we want our robots to be able to autonomously select their goals according to their current capabilities and learn controllers or policies to achieve those goals. In this paper, we take a step towards solving this challenge and propose a novel pipeline, called AGRL, that effectively combines deterministic simulations, Variational Auto-Encoders (VAEs) and Reinforcement Learning (RL) and enables robots to learn goal-conditioned policies suited to their capabilities. Our main intuition is that we can use effective exploration strategies in order to learn a good goal representation and distribution, and then use this distribution to generate effective and reachable goals for fast skill learning. We extensively evaluate the proposed method in simulation with a 7DOF manipulator and a differential drive mobile robot.} } @inproceedings{chatzilygeroudis2023lion, title={Fast and Robust Constrained Optimization via Evolutionary and Quadratic Programming}, author={
Konstantinos Chatzilygeroudis
, and Michael Vrahatis}, year={2023}, booktitle={The 17th Learning and Intelligent Optimization Conference (LION)}, url={./files/LION17.pdf}, abstract={Many efficient and effective approaches have been proposed in the evolutionary computation literature for solving constrained optimization problems. Most of the approaches assume that both the objective function and the constraints are black-box functions, while a few of them can take advantage of the gradient information. On the other hand, when the gradient information is available, the most versatile approaches are arguably the ones coming from the numerical optimization literature. Perhaps the most popular methods in this field are sequential quadratic programming and interior point. Despite their success, those methods require accurate gradients and usually require a well-shaped initialization to work as expected. In the paper at hand, a novel hybrid method, named UPSO-QP, is presented that is based on particle swarm optimization and borrows ideas from the numerical optimization literature and sequential quadratic programming approaches. The proposed method is evaluated on numerous constrained optimization tasks from simple low dimensional problems to high dimensional realistic trajectory optimization scenarios, and showcase that is able to outperform other evolutionary algorithms both in terms of convergence speed as well as performance, while also being robust to noisy gradients and bad initialization.} } @inproceedings{mayr2022skill, title={Skill-based Multi-objective Reinforcement Learning of Industrial Robot Tasks with Planning and Knowledge Integration}, author={Matthias Mayr, Faseeh Ahmad,
Konstantinos Chatzilygeroudis
, Luigi Nardi, and Volker Krueger}, year={2022}, booktitle={IEEE International Conference on Robotics and Biomimetics (ROBIO)}, abstract={In modern industrial settings with small batch sizes it should be easy to set up a robot system for a new task. Strategies exist, e.g. the use of skills, but when it comes to handling forces and torques, these systems often fall short. We introduce an approach that provides a combination of task-level planning with targeted learning of scenario-specific parameters for skill-based systems. We propose the following pipeline: (1) the user provides a task goal in the planning language PDDL, (2) a plan (i.e., a sequence of skills) is generated and the learnable parameters of the skills are automatically identified. An operator then chooses (3) reward functions and hyperparameters for the learning process. Two aspects of our methodology are critical: (a) learning is tightly integrated with a knowledge framework to support symbolic planning and to provide priors for learning, (b) using multi-objective optimization. This can help to balance key performance indicators (KPIs) such as safety and task performance since they can often affect each other. We adopt a multi-objective Bayesian optimization approach and learn entirely in simulation. We demonstrate the efficacy and versatility of our approach by learning skill parameters for two different contact-rich tasks. We show their successful execution on a real 7-DOF KUKA-iiwa manipulator and outperform the manual parameterization by human robot operators.}, url={https://sites.google.com/ulund.org/SkiREIL}, video={./files/ROBIO2022.mp4}, code={https://github.com/matthias-mayr/SkiREIL} } @inproceedings{mayr2022learning, title={Learning Skill-based Industrial Robot Tasks with User Priors}, author={Matthias Mayr, Carl Hvarfner,
Konstantinos Chatzilygeroudis
, Luigi Nardi, and Volker Krueger}, year={2022}, booktitle={IEEE International Conference on Automation Science and Engineering (CASE)}, abstract={Robot skills systems are meant to reduce robot setup time for new manufacturing tasks. Yet, for dexterous, contact-rich tasks, it is often difficult to find the right skill parameters. One strategy is to learn these parameters by allowing the robot system to learn directly on the task. For a learning problem, a robot operator can typically specify the type and range of values of the parameters. Nevertheless, given their prior experience, robot operators should be able to help the learning process further by providing educated guesses about where in the parameter space potential optimal solutions could be found. Interestingly, such prior knowledge is not exploited in current robot learning frameworks. We introduce an approach that combines user priors and Bayesian optimization to allow fast optimization of robot industrial tasks at robot deployment time. We evaluate our method on three tasks that are learned in simulation as well as on two tasks that are learned directly on a real robot system. Additionally, we transfer knowledge from the corresponding simulation tasks by automatically constructing priors from well-performing configurations for learning on the real system. To handle potentially contradicting task objectives, the tasks are modeled as multi-objective problems. Our results show that operator priors, both user-specified and transferred, vastly accelerate the discovery of rich Pareto fronts, and typically produce final performance far superior to proposed baselines.}, url={https://arxiv.org/abs/2208.01605} } @inproceedings{allard2022gecco, title={Hierarchical Quality-Diversity for Online Damage Recovery (
Best Paper Award at CS Track
)}, author={Maxime Allard, Simón C. Smith,
Konstantinos Chatzilygeroudis
and Antoine Cully}, year={2022}, booktitle={The Genetic and Evolutionary Computation Conference (GECCO)}, url={https://arxiv.org/abs/2204.05726}, abstract={Adaptation capabilities, like damage recovery, are crucial for the deployment of robots in complex environments. Several works have demonstrated that using repertoires of pre-trained skills can enable robots to adapt to unforeseen mechanical damages in a few minutes. These adaptation capabilities are directly linked to the behavioural diversity in the repertoire. The more alternatives the robot has to execute a skill, the better are the chances that it can adapt to a new situation. However, solving complex tasks, like maze navigation, usually requires multiple different skills. Finding a large behavioural diversity for these multiple skills often leads to an intractable exponential growth of the number of required solutions. In this paper, we introduce the Hierarchical Trial and Error algorithm, which uses a hierarchical behavioural repertoire to learn diverse skills and leverages them to make the robot more adaptive to different situations. We show that the hierarchical decomposition of skills enables the robot to learn more complex behaviours while keeping the learning of the repertoire tractable. The experiments with a hexapod robot show that our method solves maze navigation tasks with 20% less actions in the most challenging scenarios than the best baseline while having 57% less complete failures.} } @inproceedings{dimitropoulos2022raad, title={A Brief Survey of Sim2Real Methods for Robot Learning}, author={Konstantinos Dimitropoulos, Ioannis Hatzilygeroudis and
Konstantinos Chatzilygeroudis
}, year={2022}, booktitle={31st International Conference on Robotics in Alpe-Adria-Danube Region (RAAD)}, url={https://link.springer.com/chapter/10.1007/978-3-031-04870-8_16}, abstract={Simulation has been crucial for robotics research development almost from the beginning of its existence. While simulation has been widely used for education, testing, and prototyping, only very recently the robotics community has attempted transferring behaviors learned in simulation to the real world (this process is usually referred to as Sim2Real). Those attempts have opened-up a novel research direction that has produced some exciting results that were previously thought impossible to achieve. In this paper, we attempt to give a quick overview of the most promising Simulation-To-Reality (Sim2Real) methods, results and directions.} } @inproceedings{mayr2021iros, title={Learning of Parameters in Behavior Trees for Movement Skills}, author={Matthias Mayr,
Konstantinos Chatzilygeroudis
, Faseeh Ahmad, Luigi Nardi and Volker Krueger}, year={2021}, url={./files/IROS2021.pdf}, video={./files/IROS2021.mp4}, booktitle={IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, abstract={Reinforcement Learning (RL) is a powerful mathematical framework that allows robots to learn complex skills by trial-and-error. Despite numerous successes in many applications, RL algorithms still require thousands of trials to converge to high-performing policies, can produce dangerous behaviors while learning, and the optimized policies (usually modeled as neural networks) give almost zero explanation when they fail to perform the task. For these reasons, the adoption of RL in industrial settings is not common. Behavior Trees (BTs), on the other hand, can provide a policy representation that a) supports modular and composable skills, b) allows for easy interpretation of the robot actions, and c) provides an advantageous low-dimensional parameter space. In this paper, we present a novel algorithm that can learn the parameters of a BT policy in simulation and then generalize to the physical robot without any additional training. We leverage a physical simulator with a digital twin of our workstation, and optimize the relevant parameters with a black-box optimizer. We showcase the efficacy of our method with a 7-DOF KUKA-iiwa manipulator in a task that includes obstacle avoidance and a contact-rich insertion (peg-in-hole), in which our method outperforms the baselines.} } @inproceedings{chatzilygeroudis2021lion, title={Feature Selection in single-cell RNA-seq data via a Genetic Algorithm}, author={
Konstantinos Chatzilygeroudis
, Aristidis Vrahatis, Sotiris Tasoulis, and Michael Vrahatis}, year={2021}, booktitle={The 15th Learning and Intelligent Optimization Conference (LION)}, url={./files/LION15.pdf}, abstract={Big data methods prevail in the biomedical domain leading to effective and scalable data-driven approaches. Biomedical data are known for their ultra-high dimensionality, especially the ones coming from molecular biology experiments. This property is also included in the emerging technique of single-cell RNA-sequencing (scRNA-seq), where we obtain sequence information from individual cells. A reliable way to uncover their complexity is by using Machine Learning approaches, including dimensional reduction and feature selection methods. Although the first choice has had remarkable progress in scRNA-seq data, only the latter can offer deeper interpretability at the gene level since it highlights the dominant gene features in the given data. Towards tackling this challenge, we propose a feature selection framework that utilizes genetic optimization principles and identifies low-dimensional combinations of gene lists in order to enhance classification performance of any off-the-shelf classifier (e.g., LDA or SVM). Our intuition is that by identifying an optimal genes subset, we can enhance the prediction power of scRNA-seq data even if these genes are unrelated to each other. We showcase our proposed framework's effectiveness in two real scRNA-seq experiments with gene dimensions up to 36708. Our framework can identify very low-dimensional subsets of genes (less than 200) while boosting the classifiers' performance. Finally, we provide a biological interpretation of the selected genes, thus providing evidence of our method's utility towards explainable artificial intelligence.} } @inproceedings{duarte2020human, title={From human action understanding to robot action execution: how the physical properties of handled objects modulate non-verbal cues}, author={Nuno Ferreira Duarte,
Konstantinos Chatzilygeroudis
, José Santos-Victor, and Aude Billard}, year={2020}, url={./files/ICDL2020.pdf}, video={./files/ICDL2020.mp4}, booktitle={International Conference on Development and Learning and Epigenetic Robotics (ICDL-EpiRob)}, abstract={Humans manage to communicate action intentions in a non-verbal way, through body posture and movement. We start from this observation to investigate how a robot can decode a human's non-verbal cues during the manipulation of an object, with specific physical properties, to learn the adequate level of "carefulness" to use when handling that object. We construct dynamical models of the human behaviour using a human-to-human handover dataset consisting of 3 different cups with different levels of fillings. We then included these models into the design of an online classifier that identifies the type of action, based on the human wrist movement. We close the loop from action understanding to robot action execution with an adaptive and robust controller based on the learned classifier, and evaluate the entire pipeline on a collaborative task with a 7-DOF manipulator. Our results show that it is possible to correctly understand the "carefulness" behaviour of humans during object manipulation, even in the pick and place scenario, that was not part of the training set.} } @inproceedings{starke2019GraspForces, title={On Force Synergies in Human Grasping Behavior}, author={Julia Starke,
Konstantinos Chatzilygeroudis
, Aude Billard and Tamim Asfour}, year={2019}, booktitle={International Conference on Humanoid Robots}, url={./files/ICHR19_0047_FI.pdf}, abstract={The human hand is a versatile and complex system with dexterous manipulation capabilities. For the transfer of human grasping capabilities to humanoid robotic and prosthetic hands, an understanding of the dynamic characteristics of grasp motions is fundamental. Although the analysis of grasp synergies, especially for kinematic hand postures, is a very active field of research, the description and transfer of grasp forces is still a challenging task. In this work, we introduce a novel representation of grasp synergies in the force space, socalled force synergies, which describe forces applied at contact locations in a low dimensional space and are inspired by the correlations between grasp forces in fingers and palm. To evaluate this novel representation, we conduct a human grasping study with eight subjects performing handover and tool use tasks on 14 objects with varying content and weight using 16 different grasp types. We capture contact forces at 18 locations within the hand together with the joint angle values of a data glove with 22 degrees of freedom. We identify correlations between contact forces and derive force synergies using dimensionality reduction techniques, which allow to represent grasp forces applied during grasping with only eight parameters.} } @inproceedings{kaushik2018multi, title={Multi-objective Model-based Policy Search for Data-efficient Learning with Sparse Rewards}, author={Rituraj Kaushik,
Konstantinos Chatzilygeroudis
, and Jean-Baptiste Mouret}, year={2018}, booktitle={Conference on Robot Learning (CoRL)}, url={https://arxiv.org/abs/1806.09351}, video={https://www.youtube.com/watch?v=XOBWq7mkYho}, code={https://github.com/resibots/kaushik_2018_multi-dex}, abstract={The most data-efficient algorithms for reinforcement learning in robotics are model-based policy search algorithms, which alternate between learning a dynamical model of the robot and optimizing a policy to maximize the expected return given the model and its uncertainties. However, the current algorithms lack an effective exploration strategy to deal with sparse or misleading reward scenarios: if they do not experience any state with a positive reward during the initial random exploration, it is very unlikely to solve the problem. Here, we propose a novel model-based policy search algorithm, Multi-DEX, that leverages a learned dynamical model to efficiently explore the task space and solve tasks with sparse rewards in a few episodes. To achieve this, we frame the policy search problem as a multi-objective, model-based policy optimization problem with three objectives: (1) generate maximally novel state trajectories, (2) maximize the expected return and (3) keep the system in state-space regions for which the model is as accurate as possible. We then optimize these objectives using a Pareto-based multi-objective optimization algorithm. The experiments show that Multi-DEX is able to solve sparse reward scenarios (with a simulated robotic arm) in much lower interaction time than VIME, TRPO, GEP-PG, CMA-ES and Black-DROPS.} } @inproceedings{chatzilygeroudis2018using, title={Using Parameterized Black-Box Priors to Scale Up Model-Based Policy Search for Robotics}, author={
Konstantinos Chatzilygeroudis
and Jean-Baptiste Mouret}, url={https://arxiv.org/abs/1709.06917}, year={2018}, video={https://youtu.be/HFkZkhGGzTo}, code={https://github.com/resibots/blackdrops}, booktitle={International Conference on Robotics and Automation (ICRA)}, abstract={The most data-efficient algorithms for reinforcement learning in robotics are model-based policy search algorithms, which alternate between learning a dynamical model of the robot and optimizing a policy to maximize the expected return given the model and its uncertainties. Among the few proposed approaches, the recently introduced Black-DROPS algorithm exploits a black-box optimization algorithm to achieve both high data-efficiency and good computation times when several cores are used; nevertheless, like all model-based policy search approaches, Black-DROPS does not scale to high dimensional state/action spaces. In this paper, we introduce a new model learning procedure in Black-DROPS that leverages parameterized black-box priors to (1) scale up to high-dimensional systems, and (2) be robust to large inaccuracies of the prior information. We demonstrate the effectiveness of our approach with the "pendubot" swing-up task in simulation and with a physical hexapod robot (48D state space, 18D action space) that has to walk forward as fast as possible. The results show that our new algorithm is more data-efficient than previous model-based policy search algorithms (with and without priors) and that it can allow a physical 6-legged robot to learn new gaits in only 16 to 30 seconds of interaction time.} } @inproceedings{pautrat2018bayesian, title={Bayesian Optimization with Automatic Prior Selection for Data-Efficient Direct Policy Search}, author={Rémi Pautrat,
Konstantinos Chatzilygeroudis
and Jean-Baptiste Mouret}, url={https://arxiv.org/abs/1709.06919}, year={2018}, video={https://youtu.be/xo8mUIZTvNE}, code={https://github.com/resibots/pautrat_2018_mlei}, booktitle={International Conference on Robotics and Automation (ICRA).}, journal={A short version of the paper was accepted at the non-archival track of the 1st Conference on Robot Learning (CoRL) 2017}, abstract={One of the most interesting features of Bayesian optimization for direct policy search is that it can leverage priors (e.g., from simulation or from previous tasks) to accelerate learning on a robot. In this paper, we are interested in situations for which several priors exist but we do not know in advance which one fits best the current situation. We tackle this problem by introducing a novel acquisition function, called Most Likely Expected Improvement (MLEI), that combines the likelihood of the priors and the expected improvement. We evaluate this new acquisition function on a transfer learning task for a 5-DOF planar arm and on a possibly damaged, 6-legged robot that has to learn to walk on flat ground and on stairs, with priors corresponding to different stairs and different kinds of damages. Our results show that MLEI effectively identifies and exploits the priors, even when there is no obvious match between the current situations and the priors.} } @inproceedings{paul2018aloq, title={{Alternating Optimisation and Quadrature for Robust Control}}, author={Supratik Paul,
Konstantinos Chatzilygeroudis
, Kamil Ciosek, Jean-Baptiste Mouret, Michael A. Osborne and Shimon Whiteson}, booktitle={Thirty-Second AAAI Conference on Artificial Intelligence (AAAI)}, url={http://www.cs.ox.ac.uk/people/shimon.whiteson/pubs/paulaaai18.pdf}, year={2018}, abstract={Bayesian optimisation has been successfully applied to a variety of reinforcement learning problems. However, the traditional approach for learning optimal policies in simulators does not utilise the opportunity to improve learning by adjusting certain environment variables: state features that are unobservable and randomly determined by the environment in a physical setting but are controllable in a simulator. This paper considers the problem of finding a robust policy while taking into account the impact of environment variables. We present Alternating Optimisation and Quadrature (ALOQ), which uses Bayesian optimisation and Bayesian quadrature to address such settings. ALOQ is robust to the presence of significant rare events, which may not be observable under random sampling, but play a substantial role in determining the optimal policy. Experimental results across different domains show that ALOQ can learn more efficiently and robustly than existing methods.}, organization={AAAI} } @inproceedings{chatzilygeroudis2017black, title={{Black-Box Data-efficient Policy Search for Robotics}}, author={
Konstantinos Chatzilygeroudis
, Roberto Rama, Rituraj Kaushik, Dorian Goepp, Vassilis Vassiliades and Jean-Baptiste Mouret}, booktitle={International Conference on Intelligent Robots and Systems (IROS)}, abstract={The most data-efficient algorithms for reinforcement learning (RL) in robotics are based on uncertain dynamical models: after each episode, they first learn a dynamical model of the robot, then they use an optimization algorithm to find a policy that maximizes the expected return given the model and its uncertainties. It is often believed that this optimization can be tractable only if analytical, gradient-based algorithms are used; however, these algorithms require using specific families of reward functions and policies, which greatly limits the flexibility of the overall approach. In this paper, we introduce a novel model-based RL algorithm, called Black-DROPS (Black-box Data-efficient RObot Policy Search) that: (1) does not impose any constraint on the reward function or the policy (they are treated as black-boxes), (2) is as data-efficient as the state-of-the-art algorithm for data-efficient RL in robotics, and (3) is as fast (or faster) than analytical approaches when several cores are available. The key idea is to replace the gradient-based optimization algorithm with a parallel, black-box algorithm that takes into account the model uncertainties. We demonstrate the performance of our new algorithm on two standard control benchmark problems (in simulation) and a low-cost robotic manipulator (with a real robot).}, url={https://arxiv.org/abs/1703.07261}, code={https://github.com/resibots/blackdrops}, year={2017}, video={https://www.youtube.com/watch?v=kTEyYiIFGPM}, organization={IEEE} } @inproceedings{koustoumpardis2015human, title={Human robot collaboration for folding fabrics based on force/RGB-D feedback}, author={Panagiotis Koustoumpardis,
Konstantinos Chatzilygeroudis
, Aris Synodinos and Nikos Aspragathos}, booktitle={24th International Conference on Robotics in Alpe-Adria-Danube Region (RAAD)}, abstract={In this paper, the human-robot collaboration for executing complicated handling tasks for folding non-rigid objects is investigated. A hierarchical control system is developed for the co-manipulation task of folding sheets like fabrics/cloths. The system is based on force and RGB-D feedback in both higher and lower control levels of the process. In the higher level, the perception of the human's intention is used for deciding the robot's action; in the lower level the robot reacts to the force/RGB-D feedback to follow human guidance. The proposed approach is tested in folding a rectangular piece of fabric. Experiments showed that the developed robotic system is able to track the human's movement in order to help her/him to accomplish the folding co-manipulation task.}, url={http://dx.doi.org/10.1007/978-3-319-21290-6_24}, year={2015}, organization={IEEE} }
@inproceedings{totsila2023end, title={End-to-End Stable Imitation Learning via Autonomous Neural Dynamic Policies}, author={Dionis Totsila*,
Konstantinos Chatzilygeroudis
*, Denis Hadjivelichkov, Valerio Modugno, Ioannis Hatzilygeroudis, and Dimitrios Kanoulas}, year={2023}, booktitle={Life-Long Learning with Human Help (L3H2) Workshop, at ICRA}, url={https://arxiv.org/abs/2305.12886} } @inproceedings{mayr2022combining, title={Combining Planning, Reasoning and Reinforcement Learning to solve Industrial Robot Tasks}, author={Matthias Mayr, Faseeh Ahmad,
Konstantinos Chatzilygeroudis
, Luigi Nardi, and Volker Krueger}, year={2022}, booktitle={2nd Workshop on Trends and Advances in Machine Learning and Automated Reasoning for Intelligent Robots and Systems, at IROS}, url={https://arxiv.org/abs/2212.03570} } @inproceedings{mayr2022set, title={How to Set Up & Learn New Robot Tasks with Explainable Behaviors?}, author={Matthias Mayr, Faseeh Ahmad,
Konstantinos Chatzilygeroudis
, Luigi Nardi, and Volker Krueger}, booktitle={European Robotics Forum}, year={2022}, url={https://portal.research.lu.se/en/publications/how-to-set-up-amp-learn-new-robot-tasks-with-explainable-behavior} } @inproceedings{mouret201720, title={20 Years of Reality Gap: a few Thoughts about Simulators in Evolutionary Robotics}, author={Jean-Baptiste Mouret and
Konstantinos Chatzilygeroudis
}, booktitle={Proceedings of the International Workshop "Simulation in Evolutionary Robotics" at the Genetic and Evolutionary Computation Conference (GECCO)}, abstract={Simulators in Evolutionary Robotics (ER) are often considered as a "temporary evil" until experiments can be conducted on real robots. Yet, after more than 20 years of ER, most experiments still happen in simulation and nothing suggests that this situation will change in the next few years. In this short paper, we describe the requirements of ER from simulators, what we tried, and how we successfully crossed the "reality gap" in many experiments. We argue that future simulators need to be able to estimate their confidence when they predict a fitness value, so that behaviors that are not accurately simulated can be avoided.}, url={https://hal.inria.fr/hal-01518764/}, year={2017} } @inproceedings{vassiliades2017comparing, title={Comparing multimodal optimization and illumination}, author={Vassilis Vassiliades,
Konstantinos Chatzilygeroudis
and Jean-Baptiste Mouret}, booktitle={Genetic and Evolutionary Computation Conference (GECCO) (Poster-only papers)}, abstract={Illumination algorithms are a recent addition to the evolutionary computation toolbox that allows the generation of many diverse and high-performing solutions in a single run. Nevertheless, traditional multimodal optimization algorithms also search for diverse and high-performing solutions: could some multimodal optimization algorithms be better at illumination than illumination algorithms? In this study, we compare two illumination algorithms (Novelty Search with Local Competition (NSLC), MAP-Elites) with two multimodal optimization ones (Clearing, Restricted Tournament Selection) in a maze navigation task. The results show that Clearing can have comparable performance to MAP-Elites and NSLC.}, url={https://hal.inria.fr/hal-01518802/}, year={2017} } @inproceedings{vassiliades2017comparison, title={A comparison of illumination algorithms in unbounded spaces}, author={Vassilis Vassiliades,
Konstantinos Chatzilygeroudis
and Jean-Baptiste Mouret}, booktitle={Proceedings of the International Workshop "Measuring and Promoting Diversity in Evolutionary Algorithms" at the Genetic and Evolutionary Computation Conference (GECCO)}, abstract={Illumination algorithms are a new class of evolutionary algorithms capable of producing large archives of diverse and high-performing solutions. Examples of such algorithms include Novelty Search with Local Competition (NSLC), the Multi-dimensional Archive of Phenotypic Elites (MAP-Elites) and the newly introduced Centroidal Voronoi Tessellation (CVT) MAP-Elites. While NSLC can be used in unbounded behavioral spaces, MAP-Elites and CVT-MAP-Elites require the user to manually specify the bounds. In this study, we introduce variants of these algorithms that expand their bounds based on the discovered solutions. In addition, we introduce a novel algorithm called "Cluster-Elites" that can adapt its bounds to non-convex spaces. We compare all algorithms in a maze navigation problem and illustrate that Cluster-Elites and the expansive variants of MAP-Elites and CVT-MAP-Elites have comparable or better performance than NSLC, MAP-Elites and CVT-MAP-Elites.}, url={https://hal.inria.fr/hal-01518814/}, year={2017} } @inproceedings{papaspyros2016safety, title={Safety-Aware Robot Damage Recovery Using Constrained Bayesian Optimization and Simulated Priors}, author={Vaios Papaspyros,
Konstantinos Chatzilygeroudis
, Vassilis Vassiliades and Jean-Baptiste Mouret}, booktitle={BayesOpt '16: Proceedings of the International Workshop "Bayesian Optimization: Black-box Optimization and Beyond" at NIPS}, abstract={The recently introduced Intelligent Trial-and-Error (IT&E) algorithm showed that robots can adapt to damage in a matter of a few trials. The success of this algorithm relies on two components: prior knowledge acquired through simulation with an intact robot, and Bayesian optimization (BO) that operates on-line, on the damaged robot. While IT&E leads to fast damage recovery, it does not incorporate any safety constraints that prevent the robot from attempting harmful behaviors. In this work, we address this limitation by replacing the BO component with a constrained BO procedure. We evaluate our approach on a simulated damaged humanoid robot that needs to crawl as fast as possible, while performing as few unsafe trials as possible. We compare our new "safety-aware IT&E" algorithm to IT&E and a multi-objective version of IT&E in which the safety constraints are dealt as separate objectives. Our results show that our algorithm outperforms the other approaches, both in crawling speed within the safe regions and number of unsafe trials.}, url={https://arxiv.org/abs/1611.09419}, year={2016}, video={https://www.youtube.com/watch?v=8esrj-7WhsQ} } @inproceedings{chatzilygeroudis2016semi-episodic, title={Towards semi-episodic learning for robot damage recovery}, author={
Konstantinos Chatzilygeroudis
, Antoine Cully and Jean-Baptiste Mouret}, booktitle={AILTA '16: Proceedings of the International Workshop "AI for Long-term Autonomy" at ICRA}, abstract={The recently introduced Intelligent Trial and Error algorithm (IT&E) enables robots to creatively adapt to damage in a matter of minutes by combining an off-line evolutionary algorithm and an on-line learning algorithm based on Bayesian Optimization. We extend the IT&E algorithm to allow for robots to learn to compensate for damages while executing their task(s). This leads to a semi-episodic learning scheme that increases the robot’s life-time autonomy and adaptivity. Preliminary experiments on a toy simulation and a 6-legged robot locomotion task show promising results.}, url={https://arxiv.org/abs/1610.01407}, year={2016}, organization={IEEE}, video={https://www.youtube.com/watch?v=Gpf5h07pJFA} }
.
,
Edited by:
,
(view online)
(code)
(video)
Konstantinos Chatzilygeroudis
Robotics and Machine Learning Researcher and Engineer
Assistant Professor at University of Patras
costashatz@upatras.gr
Toggle navigation
Home
Publications
CV
Videos
Contact
Pre-prints (under review)
Peer-reviewed Journal Papers
Peer-reviewed Book Chapters
Peer-reviewed Conference Papers
Peer-reviewed Workshop Papers
*Equal contribution