@inproceedings{ayab93, author = {B. el Ayeb and Shengrui Wang}, year = {1993}, booktitle = {Proceedings of the Thirteenth International Joint Conference on Artificial Intelligence}, title = {Computing Effect-to-Cause/Cause-to-Effect Diagnoses within NdL}, pages = {1332--1338}, annote = {This paper presents a neural network design which models a cause-to-effect reasoning process. The claim of this paper are that the network finds the simplest and most probable (most supported by the evidence) cause for each effect. This claim is supported by results from the analysis of a simplified partial causal model of a car. The usefulness of this system is limited due to its basic assumption that there is only one actual cause for each effect and that it cannot handle sensor data directly. Instead it deals with symbolic models of identifiable causes and effects and therefore could be used for offline refinement and validation of casual models. Initial values are set based on existing beliefs of the dependency between nodes, these values must represent a good guess or the network will not converge. Causes with the same effect compete with each other until only one remains, where the one with the highest initial value is favored. This technique is not unique but has advantages over similar neural networks developed in the past. The additive updating rule and temporal adaptation of connection weights both serve to optimize the network's performance. No specific limitations of this system are mentioned, though studying different classes of diagnosis problems and more distributed solutions are discussed as avenues for future work.} } @inproceedings{console99, author = {L. Console and O. Dressler}, year = {1999}, booktitle = {Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence}, title = {Model-Based Diagnosis in the Real World: Lessons Learned and Challenges Remaining}, pages = {1393--1400}, annote = {The purpose of this paper is to review the state of model based diagnosis (MBD) as an applied field of AI. The principle claim of this paper is that MBD field has reached a level of maturity which makes it suitable for real word applications. This claim is supported by evidence pointing to the maturity of the field. It cites several successful applications of this technique including a detailed look at its application in the automotive industry. The usefulness of this analysis lies in an important conclusion drawn which is the following: the ability to diagnose a system would be greatly enhanced if the requirements of a diagnosis system are considered in the design of a device, instead of afterwards. It also serves as a good source of information for the state of the field.} } @inproceedings{darwiche99, author = {A. Darwiche}, year = {1999}, booktitle = {Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence}, title = {Utilizing Device Behavior in Structure-Based Diagnosis}, pages = {1096--1101}, annote = {This paper presents a formal study of how other factors, such as device behavior, can be used to improve purely structural diagnosis techniques. The claim of this paper is that device behavior can be used to reduce the complexity of diagnosis of devices which are too well connected to be efficiently handled by purely structural based techniques. This claim is supported by a series of simulated experiments which showed significant improvements in the number of steps taken by the system to arrive at a diagnosis. These experiments used randomly generated devices consisting of only buffers and "and/or" gates where each device could have at most one fault. This technique is useful only for improving existing structural based diagnosis systems. Since the process used to actually find the fault is treated as a black box in this study, it does not present any new practical methods for performing diagnosis. The paper does mention that the work presented is preliminary and that there are limitations to the algorithms presented. The main problem is that it does not take full advantage behavioral information.} } @inproceedings{deuker98, author = {B. Deuker and M. Perrier and B. Amy}, year = {1998}, booktitle = {IEEE Oceanic Engineering Society. OCEANS'98. Conference Proceedings}, title = {Fault-Diagnosis of Subsea Robots Using Neuro-Symbolic Hybrid Systems}, pages = {830--834}, annote = {This paper presents a neuro-symbolic hybrid system for diagnosis of faults in unmanned underwater vehicles. The principle claim of this paper is that this system can learn from failures not modeled in its initial set of rules. Two less important claims compare the hybrid system to a purely neural network based approach. The advantages mentioned are that it is easier to translate the diagnosis process into a form that the operator can understand and that fewer examples are required to train the system. This claim is supported by two simulated examples, both conducted using a simulation of an experimental underwater robot. The first example involves a diagnosis already modeled in the initial rules, whereas the second shows the system’s ability to learn from a new failure. This technique is useful where a rigorous model of the normal functioning of the robot is available for fault detection and there is ample time to train the system. This appears to be the first time a system like this has been used for diagnosis of faults for robots. The paper does not provide a conclusion or discussion of the work and therefore does not mention any limitations in the technique described.} } @inproceedings{djath00, author = {K. Djath and M. Dufaut and D. Wolf}, year = {2000}, booktitle = {Proceedings of the IEEE Intelligent Vehicles Symposium 2000}, title = {Mobile Robot Multisensor Reconfiguration}, pages = {110--115}, annote = {The purpose of this paper is to present a control system for a mobile robot with multiple sensors which are physically or logically redundant. The principle claim is that this system is designed to take full advantage of that redundancy including fault tolerance. No results or data of any kind are presented to support this claim. Faults are detected in two ways. First, if the sensor gives a reading outside of the possible range of predicted readings, for example the compass indicates that the robot has turned faster then it is physically capable of turning. Second, if the sensor'’s state (faulty or not faulty determined using the first method) does not fit with the state of the other sensors then that sensor is considered to be faulty. Once a fault is detected the control logic is modified to accommodate the change. This technique seems to concentrate more on using the state of the sensors (faulty or not faulty) to help determine the environment of the robot then on developing a robust determination of that state, which severely limits its applicability to diagnosis problems. Therefore this paper presents a novel method for robot localization based on sensor faults but does not present any new techniques for detecting such faults. The paper does not mention any limitations of the technique.} } @inproceedings{feret94, author = {M. Feret and J. Glasgow}, year = {1994}, booktitle = {Proceedings of the Twelfth National Conference on Artificial Intelligence}, title = {Experience-Aided Diagnosis for Complex Devices}, pages = {29--35}, annote = {This paper presents a formal definition of Experience-Aided Diagnosis (EAD). The principle claim of this paper is that case-based reasoning can be integrated into model-based diagnosis systems which would allow them to learn from their own mistakes. This claim is supported by experimental results included in another paper by the authors which includes implementations of this model with results. Model-based reasoning requires at least a partial causal model of the system. Weaknesses in the model are covered by case-based reasoning, here the diagnosis system will save any cases where the model produced an incorrect diagnosis. In this manner the system learns information which is either missing from the model or modeled incorrectly. The paper does not explicitly cover how to combine the two methods, stating that this part of the process is domain dependant. This technique is useful as an enhancement to purely model-based methods. This technique takes a slightly different approach from other systems which combine case-based and model-based reasoning. Here case-based reasoning is used to allow an otherwise static model-based diagnosis system to learn from its mistakes where other systems have used this technique to describe known diagnosis solutions. Whether or not this difference in approach has an impact on the design of a diagnosis system depends on how the two reasoning systems are combined. The paper does not mention any limitations of this technique. Some obvious improvements would be adding the capability to update the incorrect or incomplete model and saving the solutions to frequent faults in the case-based module which is inherently faster.} } @inproceedings{goldberg01, author = {K. Goldberg and B. Chen}, year = {2001}, booktitle = {Proceedings 2001 IEEE/RSJ International Conference on Intelligent Robots and Systems. Expanding the Societal Role of Robotics in the the Next Millennium}, title = {Collaborative Control of Robot Motion: Robustness to Error}, pages = {655--660}, annote = {This paper develops a formal model of collaborative control systems where sources are modeled as finite automata. The claim of this paper is that the formal analysis supports numerous reports which suggest that such systems are highly fault tolerant. This claim is supported by experimental data from a simulator consisting of 100 sources trying to trace a circle. It clearly shows that performance is actually improved when sources send no signal or the signal is inverted. In the next section these results are mathematically proven to be correct. Sources in this system could be multiple sensors, multiple control processes, or multiple human operators. Sources are evenly divided into two groups, one providing a delta (-1, 0, or 1) in the x direction, the other group providing a delta in the y direction. The control signal is calculated by averaging the signals from each of the sources. This analysis is useful in that it provides a framework with which to formally analyze collaborative control systems. This appears to be the first time that this type of formal framework has been applied to this problem. The paper does point out the weakness that only the path error metric was used to evaluate the robustness of collaborative control systems. It suggests for future work adding time-to-completion metrics into the evaluation process.} } @inproceedings{helfman98, author = {R. Helfman and E. Baur and J. Dumer and T. Hanratty and H. Ingham}, year = {1998}, booktitle = {Proceedings of the Fifteenth National Conference on Artificial Intelligence}, title = {Turbine Engine Diagnostics (TED): An Expert Diagnostic System for the M1 Abrams Turbine Engine}, pages = {1032--1038}, annote = {The purpose of this paper is to present an expert diagnostic system which has actually been fielded by the US Army. It provides a high level description of Turbine Engine Diagnostics (TED) and discusses why this application was successful. The principle claim of this paper is that the success of this system is due to the high level of interaction between the development team and the subject matter experts throughout the design process. This claim is supported by field tests in which all 30 soldiers demonstrated a 96\% increase in their ability to diagnose the problem after an hour long training session. The field tests also showed that the system could improve the ability of both expert and novice mechanics. Though little detail is provided in this paper on the system itself, there is plenty of evidence that this system was useful and that its design can be used for similar field diagnosis systems. One possible limit to its applicability is that it seems to be designed solely as a mechanic’s aid, no interfaces between the engine itself and the system are described. TED was the first fielded maintenance system in the field of AI and addresses many of the user interface issues overlooked in similar projects. No specific limitations to the system were mentioned in the paper.} } @inproceedings{hung99, author = {E. Hung and F. Zhao}, year = {1999}, booktitle = {Proceedings of the 2nd International Conference on Information Fusion (Fusion 99)}, title = {Diagnostic Information Processing for Sensor-Rich Distributed Systems}, annote = {This paper presents a diagnostic system which uses a combination of existing signal processing and reasoning techniques. The principle claim of this paper is that this new technique will improve fault detection and analysis for systems with large amounts of data coming in from similar and/or distinct sets of sensors. This claim is supported by lab experiments done on the drive plate subsystem of a printer with several different faults introduced into the system. The results presented, from two different configurations of the system, show that the technique reduces the problem of detection and diagnosis to a simple thresholding operation. Two methods are mentioned for analyzing the raw data signal: short-time Fourier transform and wavelet analysis. Principle Component Analysis (PCA) is used to reduce the dimensionality without losing the most important information from these analyses. Then a composite feature is created by finding the product of all the feature spaces of the individual sensors. Bayesian decision theory is then used to combine data over time in the composite feature space. The conditional probability density function for each hypothesis is determined from the training data. This technique is useful in that it uses a combination of proven methods to develop a diagnosis system which is general and works well though some training is required. This technique is unique in its use of a combination of techniques from several disciplines into one diagnosis system. No specific limitations to the system were mentioned in the paper.} } @inproceedings{ishida97, author = {Y. Ishida}, year = {1997}, booktitle = {Proceedings of the Fifteenth International Joint Conference on Artificial Intelligence}, title = {Active Diagnosis by Self-Organization: An Approach by the Immune Network Metaphor}, pages = {1084--1089}, annote = {This paper presents a distributed diagnostics system modeled on the human immune system. The claim of this paper is that this system provides better fault tolerance through constant monitoring of the system. Though several examples are given, the paper does not mention any experimental results which could help to support this claim. In this system each sensor has its own agent which is constantly comparing that sensor'’s outputs with others in the same network. Each agent acts independently, communicating dynamically and in parallel with the other agents. Built in models also allow the agents to compare readings against a knowledge base of normal behavior. The agents also have other built in models which describe their relationship to other agents from different types of sensors. These networks therefore can be built for a set of sensors which provide redundant information even if the set of sensors involved are not homogenous. This technique is useful in that it is modular and therefore more robust to failure in the diagnosis system itself, but its high overhead makes it impractical for systems with limited resources. The paper claims that the success of this system where others have failed is due to the propagation of information throughout the network. Previous attempts used local voting schemes to isolate problems. No specific limitations to the system were mentioned in the paper.} } @inproceedings{kleer95, author = {J. de Kleer and O. Raiman}, year = {1995}, booktitle = {Proceedings of the Fourteenth International Joint Conference on Artificial Intelligence}, title = {Trading Off the Costs of Inference vs. Probing in Diagnosis}, pages = {1736--1741}, annote = {This paper presents an extension to an active diagnostic system which considers the cost trade-off between probing and finding the next best hypothesis to test. The principle claim of this paper is that the resulting optimization reduces the "c“ost"” of diagnosis, where the relative cost of probing versus computation can be adapted to fit the requirements of the application domain. This claim is supported by experimental results of a simulated test suite which were evaluated based on the computed cost and the number of probes required to find the correct diagnosis. The new system shows significant improvements over the older system. The usefulness of this enhancement is limited to diagnosis problems where probing is possible and the required model is available or can be created. The system uses a model of the device based on constraints and an assumption-based truth maintenance system to generate hypotheses. The usefulness of this technique is further reduced by the fact that all probes are assigned the same cost, though it is not difficult to see how varying costs could be added. This approach to minimizing the "cost"” of diagnosis is unique in that it takes into account not only the speed of computation but also the cost of probing and is flexible enough to be applied to applications with very different priorities as regards these metrics. Quite a few limitations to this system are mentioned which shows that more work in this area is needed.} } @article{krishnamurthi92, author = {M. Krishnamurthi and D.T. Phillips}, year = {1992}, journal = {Computers and Industrial Engineering}, title = {An Expert System Framework for Machine Fault Diagnosis}, pages = {67--84}, volume = {22}, number = {1}, annote = {The purpose of this paper is to present a system which generates expert systems for machine fault diagnosis. The claim of this paper is that the system will reduce the development time required for such systems by supplying a general framework from which a new expert system can be built. The development of the system is based on studies which showed that the same basic processes and data structures are used by technicians to diagnose many different classes of machine. The development also took into account the fact that the required user interaction for this type of system is also generalizable. The paper stated these claims are supported by a large number of diagnostic scenarios for the Cincinnati Milacron 786 robot which were tested and validated by a human expert. Unfortunately no data was presented aside from the fact that the system only required two weeks to be developed. The usefulness of this system lies in its use of several built-in diagnosis modules which consist of a shallow reasoning (failures with known solutions), a deep reasoning module (new failures), and a learning component which records the results of the deep reasoning module. Another advantage is that the device can be defined at any level of abstraction of the real system. The system can be further enhanced by including preference information in the test and treatment data which will help the system to determine the least expensive or time consuming paths which will lead to a diagnosis. This work is unique in that it draws on the general diagnosis strategies on human experts to create a framework for a system which can be used to diagnose a variety of devices simply by supplying the required information for that device. Though the paper did acknowledge that the system will probably need further refinements in the future, no specific limitations to the system were mentioned in the paper.} } @inproceedings{lamine00, author = {K.B. Lamine and F. Kabanza}, year = {2000}, booktitle = {Proceedings 12th IEEE Internationals Conference on Tools with Artificial Intelligence}, title = {History Checking Of Temporal Fuzzy Logic Formulas For Monitoring Behavior-Based Mobile Robots}, pages = {312--319}, annote = {This paper presents a monitoring system based on temporal fuzzy logic for use with behavior-based robots (also discussed in the author's previous work). The general approach taken in the system is built upon a temporal logic system developed for monitoring distributed real-time systems. The claim of this paper is that incorporating fuzzy semantics into a similar system will make it suitable for a robot working in an open world. This claim is supported by running the program through three sets of tests in a real world environment, the first set of tests served as a base line. On the second set of tests the system detected 22 failures which did occur. The third set of tests included the noise filtering which resulted in 25\% fewer failures, all of which were detected. This technique is useful because it provides a flexible framework in which to define faults in the system including planning and navigation problems. It is also more robust in that the truth value of a proposition is based on several snapshots over a set period of time, which is determined empirically. An ordered weighted average (OWA) operator is used to evaluate the truth value over time. In the noise elimination section classical noise removal filters for salt and pepper and Gaussian noise are simply placed in the OWA operator. The system was implemented to collect data and alert an existing robot control program of any detected failures. This technique builds on previous methods by adding fuzzy semantics into a proven diagnostic system in another domain. The paper acknowledges that more research is needed to determine how to implement better models of sensor noise, like adaptive filters.} } @inproceedings{lee01, author = {Choon Fatt Lee and Yong Ping Xu}, year = {2001}, booktitle = {Proceedings of IEEE Region 10 International Conference on Electrical and Electronic Technology. TENCON 2001}, title = {A Multi-Sensor Based Temperature Measuring System with Self-Diagnosis}, pages = {903--906}, annote = {This paper presents a temperature measurement system which uses an array of 36 identical sensors (also discussed in the author's previous work) and a self-diagnostic algorithm to find faulty sensors in the array. The claim of this paper is that the redundancy can be used to provide a more accurate and robust measurement then that which is supplied by a single sensor. This claim is supported by lab experiments which showed that the sensor array without faults produced accurate readings to within 0.05 degrees C, which was an improvement on a single sensor’s one degree accuracy rate. The results also showed that using the self-diagnosis algorithm to remove faulty sensors allowed the system to continue producing accurate readings when up to a sixth of the sensors where malfunctioning. This technique may be useful for similar arrays of identical sensors but provides no help in diagnosing more complex, less homogenous systems. The “majority rules” technique used here for fault detection is common in agreement based diagnosis systems though the use of homogenous sensors allows it to be simpler and therefore more efficient and robust then other techniques. No specific limitations to the system were mentioned in the paper.} } @inproceedings{lerner00, author = {U. Lerner and R. Parr and D. Koller and G. Biswas}, year = {2000}, booktitle = {Proceedings Seventeenth National Conference on Artificial Intelligence AAAI-2000}, title = {Bayesian Fault Detection and Diagnosis in Dynamic Systems}, pages = {531--537}, annote = {This paper presents a fault detection system for complex dynamic systems. They present a novel method grounded in a combination of existing techniques, namely temporal causal graphs (TCG’s) and Kalman filters. The claim of this paper is that, using these techniques with dynamic Baysian networks, an accurate model of the state of the system state be derived from only a few sensor readings. This claim is supported by experimental data gathered from a model of a system which contains five water tanks and only three measurement points. Using only the measurements the diagnosis system was able to accurately detect faults in the tank system. This technique is useful where a rigorous model of the target system can be built but few sensor readings are available. The applicability of this system is improved through its use of dynamic Bayesian networks which are derived from a TCG of the system and are expressive enough to capture temporal dependencies as well as discrete and continuous data. A technique similar to the extended Kalman filter is used to estimate the current state of the system as the set of “beliefs” that the system is in each state. The entire technique is not novel but builds on several similar systems developed by this group. New filtering methods for reducing the number of candidate hypotheses are presented along with the added capability to predict the future state of the system which generates more accurate “belief” values for the present state. The limitations of this system are openly acknowledged as avenues for future research including the ability to work with a less constrained model for the dynamic Baysian networks and testing this technique on real-world diagnosis scenarios.} } @inproceedings{mackey01, author = {R. Mackey and M. James and Han Park and M. Zak}, year = {2001}, booktitle = {2001 IEEE Aerospace Conference Proceedings}, title = {BEAM: Technology for Autonomous Self-analysis}, pages = {2989--3001}, annote = {This paper presents an overview of an extensive architecture for failure prediction, detection, and isolation along with performance metrics. The claim of this paper is that this system can perform real-time fault detection and characterization for highly automated systems with limited resources like deep space probes. This claim is supported by its application in several application domains the results of which are found in 5 papers cited. The usefulness of this system lies in the combination of several methods which can handle many types of input from discrete status variables to real valued sensor data. The data is first filtered to remove any information which is deterministically known from the supplied system model. Analysis is then done on the remaining sensor data in groups (correlation-based) and individually (feature-based). Another component is used to determine if the two analysis components agree on the presence of a fault from a specific source, if so the system has confirmed that the fault exists. This component also looks for any disparities between software execution (symbolic data) and hardware operation (sensor data). Two modules are used to predict problems. Yet another component is used to compile all of this information for use by a planning system or human user. This system appears to be novel in its combination of so many methods and its scalable, modular design. No specific limitations to the system were mentioned in the paper.} } @inproceedings{madden99, author = {M.G.M. Madden and P.J. Nolan}, year = {1999}, booktitle = {IEE Proceedings-Control Theory and Applications}, title = {Monitoring and Diagnosis of Multiple Incipient Faults Using Fault Tree Induction}, pages = {204--212}, annote = {The purpose of this paper is to present a system which generates fault trees from databases of manually classified sensor data and then automatically creates a monitoring and fault diagnosis program from the results. The claim of this paper is that this system can automatically generate a diagnosis system from the sensor data along with manually created fault trees which already exists for most devices in commercial use. This claim is supported by experimental sensor data gathered from a simulation of a rigorously tested model of a pneumatic servo-controlled robot arm. The results in general are poor. The paper says this is due to insufficient normal data points. The usefulness of this system is limited by the fact that the success of the diagnosis is heavily dependant upon the presence of similar examples in the training set. This weakness makes this system more useful for the industrial manipulator considered and similar devices working in controlled environments then for systems which need to work in an open world like autonomous robots. The component which generates the fault trees from sensor data is drawn from previous work by the authors. Though several similar systems exist this technique is unique in that it can handle multiple faults and can automatically generate a complete diagnosis shell program in C. The paper does acknowledge that the results are poor and that the system is highly dependant on the training data.} } @inproceedings{mcilraith99, author = {S. McIlraith and G. Biswas and D. Clancy and V. Gupta}, year = {1999}, booktitle = {Hybrid Systems and AI: Modeling Analysis and Control of Discrete Plus Continuous Systems. Papers from the 1999 AAAI Symposium}, title = {Towards Diagnosing Hybrid Systems}, pages = {124--131}, annote = {This paper presents continued work by the authors on diagnosing hybrid (discrete and continuous) systems. The principle claim of this paper is that a combined qualitative and quantitative approach is required for modeling complex hybrid systems. This system was not implemented at the time that the paper was printed therefore it does not present any experimental results to support this claim. The usefulness of this system are limited in two ways: it requires a rigorous model of the target device and it is not expected to work in real time. Though the authors’ plans for future work address the latter issue. The methods in the paper build on previous work done by the authors in this domain. The paper also acknowledges the limitations of the fault detection system discussed in the paper. Faults are detected simply by significant deviations from their expected values, though more sophisticated methods of failure detection have been developed by this group.} } @inproceedings{narasimham00, author = {S. Narasimham and F. Zhao and G. Biswas and E. Hung}, year = {2000}, booktitle = {Proceedings of the 4th Symposium on Fault Detection, Supervision and Safety for Technical Processes (Safeprocess 2000)}, title = {Fault Isolation in Hybrid Systems Combining Model Based Diagnosis and Signal Processing}, pages = {512--517}, annote = {This paper mainly covers exactly the same system as in \cite{mcilraith99} with a small contribution from \cite{hung99}. The only concept presented in this paper which is not covered in the other two is the notion of model-driven adaptive signal processing. The model considered in \cite{mcilraith99} is expanded to include knowledge of the best form of signal processing to use in order to detect problems in the mode related to that model. While \cite{hung99} designed their signal processing methods for sensor-rich systems, it seems that this solution could also be used in cases where a robot is sensor deprived. For example it does not have a sensor which is designed to provide important information (motor condition) and therefore has to use signal processing on a less direct sensor (audio or camera) in order to detect a problem.} } @inproceedings{perraju97, author = {T.S. Perraju and S.P. Rana and S.P. Sarkar}, year = {1997}, booktitle = {IEEE High-Assurance Systems Engineering Workshop Proceedings}, title = {Specifying Fault Tolerance in Mission Critical Systems}, pages = {24--31}, annote = {This paper presents an extended version of I/O automata specifically designed to model mission critical systems. The claim of this paper is that these extended automata are expressive enough to capture the fault tolerant requirements of these types of systems and can therefore be used to formally study their performance in the presence of faults. This claim is supported by a model created of a fire control system of a combat vehicle. The model was able to capture the fault tolerant requirements of the system. An extend capability of these automata is their ability to capture normal versus fault or recovery actions taken by the system. Another key requirement is that timing issues such as periodic actions and deadlines can also be captured in the new automata. The result is a much more expressive automata signature which extends the I/O automata’s signature by adding normal, fault, and recovery classes along with active, inactive, enabled, and disabled states plus lower and upper bounds on the time from when the action is enabled until it is executed. The usefulness of these models will be determined in future work where mathematical expressions will be developed for calculating the performance of these automata in the presence of faults. This appears to be the first time that a new automata has been developed to formally capture the requirements of mission critical systems. No specific limitations to the new automata were mentioned in the paper.} } @inproceedings{reed98, author = {N.E. Reed}, year = {1998}, booktitle = {Proceedings Fifteenth National Conference on Artificial Intelligence AAAI-98}, title = {Constructing the Correct Diagnosis When Symptoms Disappear}, pages = {151--156}, annote = {The purpose of this paper is to present a diagnosis method that correctly identifies multiple defects, even when those defects interact. The principle claim of this paper is that the algorithm for heuristic solution construction presented in this paper can learn the symptoms for single and multiple defect cases from various types of training data. This claim is supported by tests with data from the medical files of 78 children with heart problems. One-third of these cases served as the knowledge base which left 53 cases for testing. The test results showed that the system performed almost as well as the experts for cases with single, complex, and multiple defects present. Though the domain this system was implemented for was simplistic, only 7 single defects were considered, the approach is general enough to be applied to any domain and does not require a model of the system. Its usefulness is also enhanced by its speed. It was also designed to be faster then previous methods with a worst case run time of 4 minutes. This technique is unique in that it concentrates on the effect of interacting defects and that it does not require a model of the target system. Though the need for future work was mentioned, no specific limitations to the technique were mentioned in the paper.} } @inproceedings{rinner99, author = {B. Rinner and B. Kuipers}, year = {1999}, booktitle = {Hybrid Systems and AI: Modeling Analysis and Control of Discrete Plus Continuous Systems. Papers from the 1999 AAAI Symposium}, title = {Monitoring Piecewise Continuous Behaviors by Refining Trackers and Their Models}, pages = {164--169}, annote = {This paper presents a method for monitoring hybrid (continuous and discrete) systems. The principle claim of this paper is that this system can monitor hybrid systems even with incomplete knowledge. This claim is supported by experimental results obtained from a simulated model of a two tank system with and without error. The system is made up of two main modules which act in parallel. One module is focused on tracking the observed data in the context of the current mode. It looks for significant discrepancies between that mode's model and the trends in the observed data while simultaneously refining that model. Significant discrepancies are assumed to be transitions into other known modes or unmodeled fault modes. In the case of known modes the tracker for that mode is then started. The second module tracks multiple hypotheses in parallel where each hypothesis is made up of a set of modes in sequence. These hypotheses therefore make up a high-level model of system behavior either in normal or fault conditions. The system is useful in that only weak models are required to initialize the system and it can handle asynchronous data updates. Unfortunately several important aspects of the system were not clear from the paper such as how the next known mode is found, how it handles novel modes, and whether or not the refined models are sufficiently accurate to monitor a system. This technique is unique among hybrid fault tolerant systems in its ability to refine its own model of the system. No specific limitations to the system were mentioned in the paper.} } @inproceedings{rymon93, author = {R. Rymon}, year = {1993}, booktitle = {Proceedings of the Thirteenth International Joint Conference on Artificial Intelligence}, title = {Goal-Directed Diagnosis-Diagnostic Reasoning in Exploratory-Corrective Domains}, pages = {1488--1493}, annote = {This paper presents a system for assisting physicians in the treatment of patients with multiple traumas. The principle claim of this paper is that a goal-directed diagnosis system focuses on the steps required to return the subject (patient) to a normal state and that this focus is more appropriate for the domain of medical diagnosis. This claim is supported by experiments the system was provided with information from 97 real cases. The paper states that a panel of three trauma surgeons blindly preferred the actions recommended by the system to the actions which were actually carried out in most of the cases. The system decides which action should be taken next based on a set of beliefs, attitudes which is a measure of the relevance of information, and a set of rules which encapsulate expert knowledge. Beliefs are formed from observations provided by the physician and anything that can be concluded from the rules while attitudes come from the rules alone. The planning system can also use the rules to determine if the subject has the resources available to perform the actions required and in what order to perform multiple actions. The usefulness of this system can be extended to any diagnostic domain where returning the subject (patient or device) to its normal state is more important then the diagnosis and the subject has limited resources for exploration of the problem and repair. Another advantage of this system for some domains is that it relies on expert diagnostic knowledge rather then a model of the system. This system appears to be unique in its goal-directed focus and consideration of the resources required to explore and repair the problem. No specific limitations to the system were mentioned in the paper.} } @inproceedings{sary98, author = {C. Sary and C. Peterson and J. Rowe and T. Ames and K. Mueller and W. Truszkowski and N. Ziyad}, year = {1998}, booktitle = {Multimodal Reasoning. Papers from the 1998 AAAI Symposium}, title = {Trend Analysis for Spacecraft Systems Using Multimodal Reasoning}, pages = {157--162}, annote = {This paper presents a new technique for processing telemetry from spacecraft systems. The principle claim of this paper is that the efficiency, accuracy, and reliability of trend analysis and diagnosis can be improved through multimodal reasoning. This system was not implemented at the time that the paper was printed therefore no experimental results were included to support this claim. This technique augments existing trend analysis methods with model, case, and rule-based reasoning. Faults are detected as deviances from the normal trend for each component or subsystem in the model. Rule-based reasoning is used to diagnose and solve all well known anomalies. For new anomalies case-based reasoning is used to determine if similar anomalies are already modeled. Information about anomalies which have been encountered before but are not well known are modeled using Local Dempster-Shafer theory. The model provides an ordered set of hypotheses to test against the telemetry. A similar process is used to find solutions once the diagnosis is complete. This system is useful in that it is designed to work in real time and has an online learning capability. One limitation to its applicability is the need for a complete model of the target system. This technique is unique in its application of multimodal reasoning to trend analysis. No specific limitations to the system were mentioned in the paper.} } @inproceedings{sheldon93, author = {F.T. Sheldon and H. Mei and S.-M. Yang}, year = {1993}, booktitle = {Fourth International Symposium on Software Reliability Engineering Proceedings}, title = {Reliability Prediction of Distributed Embedded Fault-Tolerant Systems}, pages = {92--102}, annote = {The purpose of this paper is to present a new method for analysis of fault tolerant systems using a static task graph technique. The principle claim of this paper is that this new technique provides a formal framework in which to compare fault tolerant systems. This claim is supported by experimental results based on a comparison of three different models for a fault tolerant subsystem of the Simplified Unmanned Vehicle System testbed. Several graphs were presented showing the predicted reliability of the system over varying reliability of its components. The strengths and weaknesses of each model could be clearly determined from these graphs. The usefulness of this technique lies in the graphs used which make it easy to identify and analyze key components of a system design and their dependencies. Formal techniques are used for converting these graphs into a set of probabilities which can be used to compare the relative reliability of the target systems. The analysis highlights the result of imposing more stringent acceptance criteria, which reduces the number of missed faults, on the rate of false alarms. This technique is unique in that no other formal techniques existed at that time for comparing the relative reliability of fault tolerant systems. The paper acknowledges that this system is limited in that it only measures the reliability of the target system in the presence of faults. Future work is needed to integrate other metrics like performance and timeliness into similar formal frameworks.} } @inproceedings{soika97, author = {M. Soika}, year = {1997}, booktitle = {Proceedings of the 1997 IEEE/RSJ International Conference on Intelligent Robots and Systems. Innovative Robotics for Real-World Applications}, title = {A Sensor Failure Detection Framework for Autonomous Mobile Robots}, pages = {1735--1740}, annote = {This paper presents a failure detection framework based on probabilistic analysis of correlation between redundant sensor readings. The principle claim of this paper is that this method does not require explicit failure models and therefore is designed to work independently of the type of failure and the robot'’s environment. This claim is supported by an example application of this framework using an occupancy grid representation. The application was tested on an autonomous mobile robot called ROAMER turning in an open environment at 20 degrees per second. A inconsistency grid was shown for two sensors along with the combined occupancy grid. It is not clear from the paper how many of these experiments were carried out or how (if?) the data was evaluated. Conditional probabilities, similar to those often used for sensor fusion, are used to determine the consistency of a sensor reading in relation to the information provided by the other sensors. The usefulness of this system is its inherent flexibility. The system does not rely on models or training, does not care whether the sensors are physically or logically redundant, and only takes into account that redundant sensor readings should agree. This technique is unique in that it does not require any models of the system either in normal or fault operation modes to detect problems. The weaknesses of this system are acknowledged as avenues for future work. These include the need for error detection techniques for continuous variables and recovery schemes.} } @inproceedings{stefik93, author = {D.M. Russell and M.J. Stefik and P. Pirolli and S.K. Card}, year = {1993}, booktitle = {Proceedings of INTERCHI '93. Human Factors in Computing Systems}, title = {The cost structure of sensemaking}, pages = {269--276}, annote = {This paper presents an analysis of the cost structure for sensemaking tasks. Sensemaking is defined as the process of searching for a representation and encoding data in that representation to answer task-specific questions. The principle claim is that the process of making sense of a complex body of information always follows a common pattern made up of similar operations and cyclic processes. This claim is supported by flow diagrams for four unrelated sensemaking tasks each of which followed the same basic pattern. Sensemaking tasks are ubiquitous and make up most of the tasks performed on computers today. The usefulness of this analysis therefore lies in a deeper understanding of the common elements of these types of tasks and the subsequent improvement of interfaces for human and automated agents based on that knowledge. A deeper understanding of the cost structure of sensemaking can also lead to better defined scopes of such projects. This analysis is unique in that it concentrates on the cost structure associated with representation shifts and use. The paper acknowledges that this analysis presents an incomplete picture of the sensemaking process and that additional research is needed to uncover more subtle interactions between the steps involved in this process.} } @inproceedings{stuck95, author = {E.R. Stuck}, year = {1995}, booktitle = {Proceedings of the 1995 IEEE/RSJ International Conference on Intelligent Robots and Systems. Human Robot Interaction and Cooperative Robots}, title = {Detecting and Diagnosing Navigational Mistakes}, pages = {41--46}, annote = {The purpose of this paper is to present a system for detecting navigational mistakes made by mobile robots in open environments. The principle claim of this paper is that this technique can detect global mistakes which lead the robot down incorrect paths as opposed to simple local errors. It also places more emphasis on visual mistakes like misrecognition. This claim is supported by tests performed in a simulated environment. A mistake was correctly detected in all eight experiments. In five of the experiments the robot ranked the actual mistake as the most likely hypothesis. The correct hypothesis was ranked no lower then third in any of the experiments. The usefulness of this technique lies in its ability to detect high level navigational mistakes which lower level diagnosis systems would miss. The system finds mistakes by generating expectations based on a priori information and past results. If the current sensor readings significantly differ from expectations then the “conviction” level of the robot drops below a threshold and the robot stops. At this point it analyzes the data recorded so far and generates hypotheses as to what the mistake was and when it occurred. Heuristics are used to rank the resulting hypotheses. This technique is unique in its focus on high level navigation mistakes and visual perception problems in navigation. The paper acknowledges that the simulated environment allowed them to simplify the vision and motor problems to an extent not possible in a real environment.} } @inproceedings{teije97, author = {A. ten Teije and F. van Harmelen}, year = {1997}, booktitle = {Proceedings of the Fifteenth International Joint Conference on Artificial Intelligence}, title = {Exploiting Domain Knowledge for Approximate Diagnosis}, pages = {454--459}, annote = {This paper presents a formal study of approximate diagnosis using subsets of casual models to find the correct hypothesis. The principle claim of this paper is that approximate diagnosis can be performed by incrementally adding elements of a model as needed and that a system of this type will work, in the worst case, as quickly as a diagnosis system which always considers the entire model. This claim is supported using a simplified behavior model of a car as an example, though no experimental results are included in the paper. Four general strategies are presented for resolving the problem of too few or too many hypotheses or to adjust the size of hypotheses. Each of these strategies works by adding positive or negative observations to the subset of the model to be considered. The usefulness of this approach lies in the fact that elements of the model are added incrementally and the system often does not have to consider the entire model in order to find a solution. This technique is unique in that it applies the work of Schaerf and Cadoli on approximate entailment to the problem of diagnosis, thereby developing a formal method for optimizing the diagnosis process. Several limitations are pointed out including the fact that this technique has not been applied to a realistic application and that larger application should be used to study the efficiency of this technique.} } @article{visinsky95, author = {M.L. Visinsky and J.R. Cavallaro and I.D. Walker}, year = {1995}, journal = {IEEE Transactions on Robotics and Automation}, title = {A Dynamic Fault Tolerance Framework for Remote Robots}, pages = {477--490}, volume = {11}, number = {4}, annote = {This paper presents a layered fault tolerance system for robots. The principle claim of this paper is that the layered approach can provide different levels of detection and tolerance for structurally diverse robots. This claim is supported by tests which used a simulated model of two different robotic arms. Results showed that the system was able to compensate for simulated failures and complete its task safely. The paper also refers to two other applications in which this system was used which are discussed in other papers. The low and middle layers maintain mathematical models of the dynamics of the system. The lowest layer looks for small discrepancies between the model and the sensor data and is designed to correct any such biases. The middle layer uses a collection of independent tests to monitor sensor data and isolate any errors. If an error is found the sensor or motor involved is replaced by a redundant entity in that subsystem. If the middle layer cannot fix the error it fails up. The top, or supervisor, layer uses an expert system to perform more advanced fault tolerance. It tracks the overall state of the robot and the failure rates of various parts of the robot. It is also aware of alternatives for fault recovery which are not available in the middle layer. This layer periodically checks to see if the current goal is still reachable in the degraded state of the robot. The usefulness of this system is dependent upon its reliance in the low and middle layers on accurate models of the system and its environment. The top layer's ability to track the failure rates of components and the layered approach in general make this technique unique among fault tolerance systems for robots. No specific limitations to the system were mentioned in the paper.} } @inproceedings{vos96, author = {D.W. Vos and B. Motazed}, year = {1996}, booktitle = {Proceedings of the SPIE - The International Society for Optical Engineering}, title = {The Application of Fault Tolerant Controls to UAVs}, pages = {69--75}, annote = {The purpose of this paper is to present a fault tolerant control system for autonomous unmanned air or underwater systems with logical redundancies. The principle claim of this paper is that the problem of controlling UAV or UUV'’s which are inherently linear parameter dependant (LPD) systems can be reduced into a problem which has already been solved, namely control of linear time invariant (LTI) systems. This claim is supported by the development of feedback linearization techniques and a coordinate transform which converts LPD systems into a set of coordinates where the parameters do not change over time. This allows the designers of a fault tolerant system to develop one model which can be used to compare to the actual state of the robot over the entire range of its operating envelope. The success of this design is shown in results based on three experiments where the control system was given telemetry data from actual flights of an experimental UAV. In two of those cases faults caused the UAV to crash in the real world whereas the control system was able to recover the vehicle in simulation. In a third the flight did not suffer from any faults and the control system’s commands followed the actual system’s commands exactly. The usefulness of this system is enhanced by the requirement for only one model of the normal operation of the robot through any operational mode, though it is not clear how much more difficult it might be to develop that model. This technique is unique in its formal mathematical approach to reducing the problem of UAV and UUV control to a problem which has already been solved. No specific limitations to the system were mentioned in the paper.} } @inproceedings{washington00, author = {R. Washington}, year = {2000}, booktitle = {Proceedings 2000 ICRA. Millennium Conference. IEEE International Conference on Robotics and Automation. Symposia Proceedings}, title = {On-Board Real-Time State and Fault Identification for Rovers}, pages = {1175--1181}, annote = {The purpose of this paper is to present a preliminary attempt to create a fault detection system for rovers. The claim of this paper is that this system is particularly appropriate for robotic rovers which are subject to environmental influences and sensor noise. This claim is supported by experimental tests which were primitive in nature and designed to simply show the need for further development of this system. All the models and parameters needed were determined “by hand” whereas in a mature system they would be based on real data and experience with the platform. Despite this fact the results, taken from 50 different experiments, showed that the system could correctly identify faults in the majority of cases. Also, all of the incorrectly identified faults presented in the paper were short-lived. The usefulness of this system lies in its use of a combination of Markov models and Kalman filters which are established methods for discrete and continuous analysis respectively in this field, as well as its speed. The prototype system was optimized at multiple points leading to an update rate of under a second for all 6 wheels in sequence on a platform comparable to the resources onboard the rovers. One drawback of this system is that it requires complete, reliable models of the target system in normal and fault modes. Though this technique builds on other state based methods it is unique in the inclusion of quantitative analysis using Kalman filters and context specific probabilities on state transitions. Aside from the fact that the models and parameters were determined “by hand” the paper also acknowledges that a more sophisticated version of the Kalman filter should be used to produce more accurate results.} }