%% $Header: /users/fgaertner/cvsroot/felix/tex/bibliographies/felix-stabilization.bib,v 1.3 2002/11/26 19:19:20 fgaertner Exp $
%%% Edited by Felix Gaertner <felix at informatik.tu-darmstadt.de>
%%%
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Felix C. Gaertner",
%%%     version         = "see RCS Header", 
%%%     date            = "see RCS Header",
%%%     time            = "see RCS Header",
%%%     filename        = "felix-stabilization.bib",
%%%     address         = "EPFL, I&C, LPD, Switzerland",
%%%     telephone       = "+41-21 693 7501",
%%%     FAX             = "+41 21 693 7570",
%%%     URL             = "http://lpdwww.epfl.ch/fgaertner/",
%%%     checksum        = "XXX",
%%%     email           = "fgaertner at lpdmail.epfl.ch,
%%%                        fcg at acm.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "bibliography, stabilization, fault-tolerance",
%%%     supported       = "no",
%%%     docstring       = "This BibTeX file records books and articles
%%%                        about fault-tolerance, including topics
%%%                        like stabilization, self-stabilization and
%%%                        whatever seems important to me. The annote
%%%                        field contains short content descriptions
%%%                        for my own personal use which might be
%%%                        interesting for others too.  The ISBN
%%%                        fields will be printed if the is-alpha.bst
%%%                        or is-plain.bst style files are used.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by the biblabel software
%%%                        developed for the BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted
%%%                        first by ascending year, and within each
%%%                        year, alphabetically by author or editor,
%%%                        and then, if necessary, by the 3-letter
%%%                        abbreviation at the end of the BibTeX
%%%                        citation tag, using the bibsort -byyear
%%%                        utility.  Year order has been chosen to
%%%                        make it easier to identify the most recent
%%%                        work.
%%%
%%%                        The bibsort utility, and several related
%%%                        programs for bibliography maintenance, is
%%%                        available on ftp.math.utah.edu in
%%%                        /pub/tex/bib, and at other Internet sites
%%%                        which mirror it, including the
%%%                        Comprehensive TeX Archive Network (CTAN);
%%%                        the command `finger ctan<at>pip.shsu.edu'
%%%                        will produce a list of CTAN hosts.
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility."
%%%  }
%%% ====================================================================

%%%
%%% Thanks go to:
%%% Nelson Beebe
%%%

%=======================================================================
% Acknowledgement abbreviations:


%=======================================================================
% Institutional abbreviations:

@String{inst-STAN-CS            = "Stanford University, Department of
                                  Computer Science"}

%=======================================================================
% Journal abbreviations:


@string{j-ACM = "Journal of the ACM"}

@String{j-ACM-ADALET            = "ACM Ada Letters"}

@String{j-ACM-COMPREV           = "ACM Computing Reviews"}

@String{j-ACM-COMP-SURVEYS      = "ACM Computing Surveys"}

@String{j-APL-QUOTE-QUAD        = "APL Quote Quad"}

@String{j-CACM                  = "Communications of the ACM"}

@String{j-CCCUJ                 = "C/C++ Users Journal"}

@String{j-COMP-J                = "The Computer Journal"}

@String{j-COMP-LANG-MAG         = "Computer Language Magazine"}

@String{j-COMPUT-STAT-Q         = "Computational Statistics Quarterly"}

@String{j-COMPUTER              = "Computer"}



@string{j-DC = "Distributed Computing"}

@String{j-DDJ                   = "Dr. Dobb's Journal of Software Tools"}

@String{j-IEEE-ASSP-MAG         = "IEEE ASSP magazine: a publication of the
                                  IEEE Acoustics, Speech, and Signal Processing
                                  Society"}

@String{j-IEEE-SOFTWARE         = "IEEE Software"}

@string{j-IEEE-COMPUTER = "IEEE Computer"}

@String{j-IEEE-TRANS-SOFTW-ENG  = "IEEE Transactions on Software Engineering"}

@string{j-IEEE-TRANS-COMP  = "IEEE Transactions on Computers"}

@String{j-IFIP-TRANS-A          = "IFIP Transactions. A. Computer Science and
                                  Technology"}

@String{j-INFO-PROC-SOC-JAPAN   = "Journal of the Information Processing
                                  Society of Japan = Joho Shori"}

@string{j-IPL  = "Information Processing Letters"}

@String{j-INFORMATIE            = "Informatie"}

@String{j-IS                    = "Informatik Spektrum"}

@String{j-J-COMP-SCI-TECH       = "Journal of Computer Science and Technology"}

@String{j-J-OOP                 = "Journal of Object Oriented Programming"}

@String{j-LINUX-JOURNAL         = "Linux Journal"}

@String{j-RS-MAGAZINE           = "RS\slash Magazine"}

@String{j-SEJ                   = "Software Engineering Journal"}

@String{j-SIGCSE                = "SIGCSE Bulletin (ACM Special Interest Group
                                  on Computer Science Education)"}

@String{j-SIGPLAN               = "ACM SIGPLAN Notices"}

@String{j-SOFTWARE-CONCEPTS-TOOLS = "Software --- Concepts and Tools"}

@String{j-SPE                   = "Soft{\-}ware\emdash Prac{\-}tice
                                  and Experience"}

@String{j-STRUCT-PROGRAM        = "Structured Programming"}

@String{j-SUNEXPERT             = "SunExpert"}

@String{j-TEXHAX                = "{\TeX{}{\-}hax}"}

@String{j-TEXNIQUES             = "{\TeX{}}{\-}niques, Publications for
                                  the {\TeX{}} community"}

@String{j-TOPLAS                = "ACM Transactions on Programming
                                  Languages and Systems"}

@String{j-TOCS                  = "ACM Transactions on Computer Systems"}

@String{j-TUGBOAT               = "{\TUB{}}"}

%=======================================================================
% Proceedings abbreviations:

@string{pro-ftcs85 = "Proceedings of the 15th IEEE Symposium on Fault Tolerant
                  Computing Systems (FTCS-15)"}

@string{pro-ftcs93 = "Proceedings of the 23rd IEEE Symposium on Fault Tolerant
                  Computing Systems (FTCS-23)"}

@string{pro-ftcs96 = "Proceedings of the 26th IEEE Symposium on Fault Tolerant
                  Computing Systems (FTCS-26)"}

@string{pro-ftcs97 = "Proceedings of the 27th IEEE Symposium on Fault Tolerant
                  Computing Systems (FTCS-27)"}

@string{pro-ftcs98 = "Proceedings of the 28th IEEE Symposium on Fault Tolerant
                  Computing Systems (FTCS-28)"}

@string{pro-ftcs98-fastabs = "Digest of FastAbstracts of the 28th IEEE
                  Symposium on Fault Tolerant Computing Systems
                  (FTCS-28)"}

@string{pro-wdag89 = "Proceedings of the 
         3rd International Workshop on Distributed Algorithms (WDAG89)"}

@string{pro-wdag90 = "Proceedings of the 
         4th International Workshop on Distributed Algorithms (WDAG90)"}

@string{pro-wdag91 = "Proceedings of the
         5th International Workshop on Distributed Algorithms (WDAG91)"}

@string{pro-wdag92 = "Proceedings of the 
         6th International Workshop on Distributed Algorithms (WDAG92)"}

@string{pro-wdag93 = "Proceedings of the 
         7th International Workshop on Distributed Algorithms (WDAG93)"}

@string{pro-wdag94 = "Proceedings of the 
         8th International Workshop on Distributed Algorithms (WDAG94)"}

@string{pro-wdag95 = "Proceedings of the 
         9th International Workshop on Distributed Algorithms (WDAG95)"}

@string{pro-wdag96 = "Proceedings of the  
         10th International Workshop on Distributed Algorithms (WDAG96)"}

@string{pro-wdag97 = "Proceedings of the 
         11th International Workshop on Distributed Algorithms (WDAG97)"}

@string{pro-podc84 = "Proceedings of the 3rd
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC'84)"}

@string{pro-podc90 = "Proceedings of the 9th
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC'90)"}

@string{pro-podc91 = "Proceedings of the 10th
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC'91)"}

@string{pro-podc92 = "Proceedings of the 11th
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC'92)"}

@string{pro-podc93 = "Proceedings of the 12th
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC'93)"}

@string{pro-podc94 = "Proceedings of the 13th
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC'94)"}

@string{pro-podc95 = "Proceedings of the 14th
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC'95)"}

@string{pro-podc96 = "Proceedings of the 15th
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC'96)"}

@string{pro-podc97 = "Proceedings of the 16th
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC97)"}

@string{pro-podc98 = "Proceedings of the 17th
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC'98)"}

@string{pro-podc99 = "Proceedings of the 18th
         Annual ACM Symposium on Principles of Distributed
         Computing (PODC'99)"}

@string{pro-srds91 = "Proceedings of the 10th IEEE
         Symposium on Reliable Distributed Systems (SRDS91)"}

@string{pro-srds92 = "Proceedings of the 11th IEEE
         Symposium on  Reliable Distributed Systems (SRDS92)"}

@string{pro-srds94 = "Proceedings of the 13th IEEE
         Symposium on  Reliable Distributed Systems (SRDS94)"}

@string{pro-srds95 = "Proceedings of the 14th IEEE
         Symposium on  Reliable Distributed Systems (SRDS95)"}

@string{pro-srds2000 = "Proceedings of the 19th IEEE
         Symposium on  Reliable Distributed Systems (SRDS2000)"}


@string{pro-wss95 = "Proceedings of the 2nd Workshop
         on Self-Stabilizing Systems"}

@string{pro-wss97 = "Proceedings of the 3rd Workshop
         on Self-Stabilizing Systems"}

@string{pro-wss99 = "Proceedings of the 19th IEEE International Conference 
                  on Distributed Computing Systems Workshop on 
                  Self-Stabilizing Systems"}


@string{pro-icdcs94 = "Proceedings of the 14th IEEE International
         Conference on Distributed Computing Systems (ICDCS94)"}

@string{pro-icdcs96 = "Proceedings of the 16th IEEE International
         Conference on Distributed Computing Systems (ICDCS96)"}

@string{pro-icdcs98 = "Proceedings of the 18th IEEE International
         Conference on Distributed Computing Systems (ICDCS98)"}

@string{pro-icdcs99 = "Proceedings of the 19th IEEE International
         Conference on Distributed Computing Systems (ICDCS99)"}

@string{asa = " International Symposium on Agent Systems and Applications"}
@string{ma = " International Symposium on Mobile Agents"}
@string{asama2000 = "Proceedings of the " # "Second" # asa # " and Fourth" # ma #
 " (ASA/MA2000)"}


%=======================================================================
% Publishers and their addresses:

@String{pub-ACM                 = "ACM Press, New York"}

@String{pub-ACM:adr             = "New York, NY 10036, USA"}

@String{pub-AW                  = "Ad{\-d}i{\-s}on-Wes{\-l}ey, Reading, MA"}

@String{pub-AW:adr              = "Reading, MA, USA"}

@String{pub-BENCUM              = "Benjamin/Cummings Pub. Co."}

@String{pub-BENCUM:adr          = "Redwood City, CA, USA"}

@String{pub-IEEE                = "IEEE Computer Society Press"}

@String{pub-IEEE-CSP            = "IEEE Computer Society Press"}

@String{pub-IEEE-CSP:adr        = "Los Alamitos, CA, USA"}

@String{pub-IEEE:adr            = "1109 Spring Street, Suite 300, Silver
                                    Spring, MD 20910, USA"}

@String{pub-ITCP                = "International Thomson Computer Press"}

@String{pub-ITCP:adr            = "20 Park Plaza Suite 1001, Boston,
                                  MA 02116 USA"}

@String{pub-ITP                 = "International Thomson Publishing"}

@String{pub-ITP:adr             = "5101 Madison Road, Cincinnati, OH
                                  45227, USA"}

@String{pub-MH                  = "McGraw-Hill"}

@String{pub-MH:adr              = "New York, NY, USA"}

@String{pub-MIT                 = "MIT Press"}

@String{pub-MIT:adr             = "Cambridge, MA, USA"}

@String{pub-PH                  = "Pren{\-}tice-Hall"}

@String{pub-PH:adr              = "Englewood Cliffs, NJ, USA"}

@String{pub-SUCSLI              = "Stanford University Center for the
                                  Study of Language and Information"}

@String{pub-SUCSLI:adr          = "Stanford, CA, USA"}

@String{pub-SV                  = "Spring{\-}er-Ver{\-}lag"}

@String{pub-SV:adr              = "Berlin, Germany~/ Heidelberg,
                                   Germany~/ London, UK~/ etc."}

@String{pub-TEXPLORATOR         = "The {\TeX}plorators Corporation"}

@String{pub-TEXPLORATOR:adr     = "3701 W. Alabama, Suite 450-273,
                                   Houston, TX 77027, USA"}

@String{pub-USENIX              = "USENIX"}

@String{pub-USENIX:adr          = "Berkeley, CA, USA"}

@String{pub-VNR                 = "Van Nostrand Reinhold"}

@String{pub-VNR:adr             = "New York, NY, USA"}

@String{pub-WORLD-SCI           = "World Scientific Publishing
                                  Co. Pte. Ltd."}

@String{pub-WORLD-SCI:adr       = "P. O. Box 128, Farrer Road,
                                  Singapore 9128"}

%=======================================================================
% Series abbreviations:

@String{ser-LNCS                = "Lecture Notes in Computer Science"}

%=======================================================================
% Bibliography entries.

@InProceedings{Floyd:1967:AMP,
  author =       "R. W. Floyd",
  title =        "Assigning meaning to programs",
  editor =       "J. T. Schwartz",
  booktitle =    "Mathematical aspects of computer science: Proc.
                 American Mathematics Soc. symposia",
  year =         "1967",
  volume =       "19",
  pages =        "19--31",
  address =      "Providence RI",
  publisher =    "American Mathematical Society",
  annote =       "[to get] first idea of termination function to prove
                  termination of algorithms."
}

@ARTICLE{Dijkstra:1974:SSS,
        AUTHOR = "Edsger W. Dijkstra",
        TITLE = "Self stabilizing systems in spite of distributed
         control",
        JOURNAL = j-CACM,
        VOLUME = 17,
        NUMBER = 11,
        YEAR = 1974,
        PAGES = "643--644",
        annote = "Standard reference to the introduction of the notion
                  of self-stabilization into computer science."
}

@Article{Manna:1974:AAT,
  author =       "Zohar Manna and Amir Pnueli",
  title =        "Axiomatic approach to total correctness of programs",
  journal =      "Acta Informatica",
  volume =       "3",
  pages =        "243--263",
  year =         "1974",
  annote =       "[to get] Call termination function ``convergence function''."
}



@Book{Niemann:1974:MDM,
  author = 	 {H. Niemann},
  ALTeditor = 	 {},
  title = 	 {{Methoden der Mustererkennung}},
  publisher = 	 {Akademische Verlagsgesellschaft},
  year = 	 {1974},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Frankfurt},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}

@Article{Dijkstra:1975:GCN,
  author =       {Edsger W. Dijkstra},
  title =        "Guarded commands, nondeterminacy, and formal
                  derivation of programs",
  journal =      j-CACM,
  year =         1975,
  volume =       18,
  number =       8,
  month =        aug,
  pages =        "453--457",
  OPTannote =    {}
}

@Article{Katz:1975:CLT,
  author =       "Shmuel M. Katz and Zohar Manna",
  month =        dec,
  year =         "1975",
  title =        "A closer look at termination",
  journal =      "Acta Informatica",
  volume =       "5",
  number =       "4",
  pages =        "333--352",
  annote =       "[to get] A comparison of four termination proving methods."
}

@Article{Avizienis:1976:FTS,
  author =       "Algirdas Avi\v{z}ienis",
  title =        "Fault-tolerant systems",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-IEEE-TRANS-COMP,
  year =         "1976",
  volume =       "25",
  number =       "12",
  pages =        "1304--1312",
  month =        dec,
  OPTnote =      "",
  annote =       "This is a good and surprisingly advanced survey of
                  fault tolerance issues (mainly in hardware) as of
                  1976. The main points include comparing the
                  traditional `fault intolerant' approach which aims
                  on taking only the most reliable components and
                  putting them together without employing redundancy
                  and relying on manual maintenance in case of
                  failures, with the fault tolerant approach, which
                  uses protective redundancy. While the former can be
                  less costly in many situations, the latter is source
                  for higher dependapbility figures and has
                  psychological advantages if human lives could be
                  endangered by the system. However, the two
                  approaches are complementary!  Furthermore,
                  Avizienis describes three aspects of fault tolerance
                  that have to be dealt with: (1) identification and
                  characterization of the fault set to be tolerated,
                  (2) development and choice of redundancy techniques,
                  (3) analytic or experimental prediction of the
                  effectiveness of the techniques. He also classifies
                  faults by duration, extent and value, and identifies
                  three forms of redundancy: hardware, software and
                  time. He gives a first notion of the two necessary
                  steps of detection and correction (see
                  \cite{Arora:1998:CDM}) and a lot of examples of
                  fault tolerant systems up to the year 1976. Overall,
                  a rich and despite its age still insight-heavy
                  paper."
}

@Article{Denning:1976:LMS,
  author =       "Dorothy E. Denning",
  title =        "A Lattice Model of Secure Information Flow",
  journal =      j-CACM,
  volume =       "19",
  number =       "5",
  pages =        "236--243",
  month =        may,
  year =         "1976",
  OPTnote =         "Papers from the Fifth ACM Symposium on Operating
                 Systems Principles (Univ. Texas, Austin, Tex., 1975).",
  abstract =     "Mechanisms that guarantee secure information flow in a
                 computer system are discussed. These mechanisms are
                 examined within a mathematical framework suitable for
                 formulating the requirements of secure information flow
                 among security classes. The central component of the
                 model is a lattice structure derived from the security
                 classes and justified by the semantics of information
                 flow. The model provides a unifying view of all systems
                 that restrict information flow, enables a
                 classification of them according to security
                 objectives, and suggests some new approaches. It also
                 leads to the construction of automatic program
                 certification mechanisms for verifying the secure flow
                 of information through a program.",
  keywords =     "computer operating systems; data processing; lattice;
                 mathematical models; program certification; secure
                 information flow; security; security classes; security
                 of data",
  treatment =    "A Application; T Theoretical or Mathematical",
  annote =       "[to read]"
}

@Book{Dijkstra:1976:DP,
  author = 	 {Edsger W. Dijkstra},
  title = 	 {A Discipline of Programming},
  publisher = 	 {Prentice-Hall},
  year = 	 {1976},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {Great book.}
}


@Article{Lamport:1977:PCM,
  author =       {Leslie Lamport},
  title =        {Proving the correctness of multiprocess programs},
  journal =      j-IEEE-TRANS-SOFTW-ENG,
  year =         1977,
  OPTkey =       {},
  volume =       "3",
  number =       2,
  month =        mar,
  pages =        "125--143",
  annote =       "First definition of terms ``safety'' and
                  ``liveness''. What else?"
}

@InProceedings{Pnueli:1977:TLP,
  author =       "Amir Pnueli",
  title =        "The temporal logic of programs",
  booktitle =    "Proceedings of the 18th IEEE Symposium on the
                 Foundations of Computer Science (FOCS-77)",
  address =      "Providence, Rhode Island",
  publisher =    "IEEE Computer Society Press",
  organization = "IEEE",
  month =        oct # " 31--" # nov # " 2",
  year =         "1977",
  pages =        "46--57",
  annote =       "[to read] Presents the idea of reactive systems and
                  temporal logic in contrast to transformationel
                  systems using Hoare Logic."
}

@InProceedings{Bartlett:1978:ANO,
  author =       "J. F. Bartlett",
  title =        "A {``NonStop''} operating system",
  booktitle =    "Proceedings of the 11th Hawaii International Conference on System Sciences",
  volume =       "3",
  year =         "1978",
  annote =       "description of TANDEM system.",
}

@Article{Lamport:1978:TCO,
  author =       {Leslie Lamport},
  title =        {Time, clocks and the ordering of events in a
                  distributed system},
  journal =      j-CACM,
  year =         1978,
  OPTkey =       {},
  volume =       {21},
  number =       {7},
  month =        jul,
  pages =        {558--565},
  OPTnote =      {},
  annote =       "A famous and well-readable paper on causality and
                  possible causal dependencies in distributed
                  systems. Lamport is first to introduce the
                  ``happended before'' relation (which corresponds to
                  causality) and proposes the use of logical time
                  instead of real time in distributed systems. He
                  characterises the relation as being a partial order
                  and shows how his logical time can be used to do
                  mutual exclusion. Work has subsequently lead to
                  vector time (Fidge/Mattern, cite?)."
}

@Article{Wensley:1978:SDA,
  author =       "J. H. Wensley and L. Lamport and J. Goldberg and M. W.
                 Green and K. N. Levitt and P. M. Melliar-Smith and R.
                 E. Shostak and C. B. Weinstock",
  title =        "{SIFT}: Design and analysis of a fault-tolerant
                 computer for aircraft control",
  journal =      "Proceedings of the IEEE",
  volume =       "66",
  number =       "10",
  month =        oct,
  year =         "1978",
  pages =        "1240--1255",
  annote =       "[to read]"
}





@InProceedings{Lamport:1980:SSN,
  author =       "Leslie Lamport",
  title =        "`{Sometimes}' is sometimes `not never'",
  booktitle =    "Proceedings of SIGPLAN-80, 7th ACM Symposium on
                 Principles of Programming Languages",
  address =      "Las Vegas, Nevada",
  year =         "1980",
  pages =        "174--185",
  annote =       "Discusses a difference between branching time and
                  linear time notions of temporal logic. In linear
                  time `not eventually $\neg\phi$' is equivalent to 
                  `always $\phi$'. This is not true in branching
                  time. Lamport discusses the assumptions made by
                  computer scientists about temporal properties:
                  ``The logic of linear time was used by Pnueli
                  [...], while the logic of branching time seems
                  to be the one used by most computer scientists
                  for reasoning about temporal concepts.'' As every
                  paper by Lamport, extremely well readable stuff!"
}

@Article{Pease:1980:RAP,
  author =       "M. Pease and R. Shostak and L. Lamport",
  title =        "Reaching Agreements in the Presence of Faults",
  journal =      "Journal of the ACM",
  volume =       "27",
  number =       "2",
  pages =        "228--234",
  month =        apr,
  year =         "1980",
  annote =         "This paper is similar to their 1982 publication
                 \cite{Lamport:1982:BGP}, but contains a rigorous proof
                 of the impossibility of Byzantine agreement for the
                 case $n=3$, $t=1$. As usual, $n$ is the total number of
                 processes and $t$ is the number of faulty processes.",
}



@Book{Burris:1981:CUA,
  author = 	 {Stanley N. Burris and H. P. Sankappanavar},
  ALTeditor = 	 {},
  title = 	 {A course in universal algebra},
  publisher = 	 pub-SV,
  year = 	 {1981},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  note = 	 {Revised edition online at 
                  \url{http://thoralf.uwaterloo.ca/htdocs/ualg.html}},
  OPTannote = 	 {}
}


@Book{Gries:1981:SP,
  author = 	 {David Gries},
  title = 	 {The Science of Programming},
  publisher = 	 pub-SV,
  year = 	 {1981},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}


@Article{Pnueli:1981:TSC,
  author = 	 {Amir Pnueli},
  title = 	 {The temporal semantics of concurrent programs},
  journal = 	 {Theoretical Computer Science},
  year = 	 {1981},
  OPTkey = 	 {},
  volume = 	 {13},
  OPTnumber = 	 {},
  pages = 	 {45--60},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {The semantics of a concurrent program specifies the
    set of execution sequences which are admissible as proper execution
    sequences of the program. Two main things must hold: (1) every state
    is obtained from its predecessor by execution a single enabled atomic
    action in one process, (2) no process which is infinitely often
    enabled will be infinitely often delayed (strong fairness). With
    this type of semantics one can introduce temporal operators ``always''
    and ``eventually'' which can be used to precisely reformulate the 
    usual program properties like termination, partial and total correctness,
    deadlock/starvation freedom etc.  Also, proving that a program 
    possesses some property reduces to proving a set inclusion. The logic
    still contains a ``next state'' operator which is argued against
    by Lamport in \cite{Lamport:1983:WGT} because it doesn't support
    hierachric proofs. Lamport regards this paper as the first to consider 
    identifying programs with execution sequences and thus place programs
    and specifications onto the same formal level \cite{Abadi:1993:CS}.}
}

@TechReport{Rabin:1981:HES,
  author =       "M. Rabin",
  title =        "How to exchange secrets by oblivious transfer",
  institution =  "Harvard Aiken Computation Laboratory",
  number =       "TR-81",
  year =         "1981",
  annote =       "A probabilistic exchange protocol similar to 
                  \cite{Blum:1983:HES}. [to get]"
}



@Article{Chang:1982:EAD,
  author = 	 {E. J.-H. Chang},
  title = 	 {Echo algorithms: {Depth} parallel operations on
                  general graphs},
  journal = 	 j-IEEE-TRANS-SOFTW-ENG,
  year = 	 {1982},
  OPTkey = 	 {},
  volume = 	 {SE-8},
  OPTnumber = 	 {},
  pages = 	 {391--401},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[to get] Reference to Echo algorithm}
}

@InCollection{Girault:1982:PPC,
  author = 	 {C. Girault},
  title = 	 {Proof of protocols in the case of failures},
  booktitle = 	 {Parallel processing systems. An advanced course},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {121--139},
  publisher = {Cambridge University Press},
  year = 	 {1982},
  editor = 	 {J. Evans},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTtype = 	 {},
  OPTchapter = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[to read]}
}

@Article{Lamport:1982:BGP,
  author = 	 "L. Lamport and R. Shostak and M. Pease",
  title = 	 "The {Byzantine} generals problem",
  OPTcrossref =  "",
  OPTkey = 	 "",
  journal = 	 j-TOPLAS,
  year = 	 "1982",
  volume = 	 "4",
  number = 	 "3",
  pages = 	 "382--401",
  month = 	 jul,
  OPTnote = 	 "",
  annote = 	 "This is one of the all time classic papers in fault
		  tolerant distributed computing: the Byzantine
		  Generals Problem (BGP) is presented and scenarios
		  are discussed where it is solvable and
		  unsolvable. The BGP consists of a set of nodes in a
		  completely connected network, one of which is called
		  the commander and all others are lieutenants. There
		  can be a certain number m of traitors in the set of
		  nodes. The problem is that the commander sends an
		  order to all lieutenants and (1) all lieutenants
		  must obey the same order, and (2) if the commander
		  is not a traitor then every other non-traitor obeys
		  the order he sends. The real world scenarios where
		  this problem exists are those where a set of
		  replicated processors must act in unison despite the
		  fact that all get different input (high reliability
		  systems). It turns out that the problem is
		  unsolvable if there are no more than 3m nodes in the
		  network. If messages can be signed, then it remains
		  unsolvable if half the nodes can be traitors. On the
		  other hand, if there are 3m+1 nodes (or 2n+1
		  respectively), then the BGP is solvable. Two
		  algorithms are given. They are presented and proved
		  in a recursive/inductive fashion which is quite
		  stunning. The authors remark, that the problem is
		  unsolvable in asynchronous systems (where there is
		  no possibility of implementing synchronized clocks
		  in the presence of faults). Also, there algorthm for
		  the 3m+1 case seems to be optimal although it
		  requires a message path of m+1 and has a high
		  message complexity. The authors argue that extremely
		  high reliability has its cost. Byzantine behaviour
		  is implicitly modeled by always choosing the worst
		  choice, or considering all choices and choosing the
		  worst."
}

@InProceedings{Ben-Or:1983:AAF,
  author =       "Michael Ben-Or",
  title =        "Another Advantage of Free Choice: Completely
                 Asynchronous Agreement Protocols",
  booktitle =    "Proc. Second Ann. ACM Symp. on Principles of
                 Distributed Computing",
  year =         "1983",
  pages =        "27--30",
  annote =         "Ben-Or's probabilistic algorithm for asynchronous
                 Byzantine agreement, discussed in
                 Section~\ref{sec-byzantine}, was one of the first
                 published solution to the problem, and remains the
                 simplest. Processes toss coins independently to reach
                 consensus on a value. His algorithm requires that less
                 than one-fifth of the processes are faulty for
                 correctness to be guaranteed. The expected number of
                 rounds is exponential in the number of processes $n$,
                 but becomes a constant when the number of faulty
                 processes is $O(\sqrt{n})$.",
}

@Article{Blum:1983:HES,
  author =       "Manuel Blum",
  title =        "How to Exchange (Secret) Keys",
  journal =      "ACM Transactions on Computer Systems",
  volume =       "1",
  number =       "2",
  pages =        "175--193",
  month =        may,
  year =         "1983",
  bibdate =      "Thu Jan 14 11:57:59 1999",
  note =         "Previously published in ACM STOC '83 proceedings,
                 pages 440--447.",
  annote = "A protocol is presented to fairly exchange secrets using
  number theoretic means. Two parties, Alice and Bob, are assumed to
  have equal computing capabilities and equal knowledge of
  algorithms. There is no need for a trusted intermediary and no need
  for a judge outside of the system. There is a negligible probability
  of cheating. The idea is to use gradual exchange and after
  exchanging an individual bit, do some sort of zero-knowledge-proof
  to witness that the bit is actually a valid bit. This is done by a
  complicated challenge response type of method which I do not
  understand (quadratic residues, etc. involved). The probability that
  either can cheat the protocol can be made arbitrarily
  small. However, the usual problems with gradual exchange protocols
  still exist. Section 13 presents some interesting ideas regarding
  pricing of gradually exchanged bits. Claims to be similar to
  an early TR of Rabin \cite{Rabin:1981:HES}."
}

@Article{Lamport:1983:SCP,
  author = 	 {Leslie Lamport},
  title = 	 {Specifying concurrent program modules},
  journal = 	 j-TOPLAS,
  year = 	 {1983},
  OPTkey = 	 {},
  OPTvolume = 	 {5},
  OPTnumber = 	 {2},
  OPTpages = 	 {190--222},
  OPTmonth = 	 apr,
  OPTnote = 	 {},
  OPTannote = 	 {to get}
}

@InProceedings{Lamport:1983:WGT,
  author =       "{Leslie Lamport}",
  title =        "What good is Temporal Logic?",
  booktitle =    "Proceedings of the {IFIP} Congress on Information
                 Processing",
  year =         "1983",
  editor =       "{R. E. A. Mason}",
  pages =        "657--667",
  publisher =    "North-Holland",
  address =      "Amsterdam",
  annote = "This a more informal and easy going introduction into the
    merits of temporal logic than \cite{Lamport:1983:SCP}, much in the
    spirit of a later and more refined exposition
    \cite{Lamport:1989:SAS}. Lamport proposes a formal language
    because ``natural languages are very expressive and very
    imprecise'' while ``formal languages are not very expressive but
    very precise.'' The distinction is again that of safety and
    liveness properties, where safety properties can be used to reason
    about real-time behavior if the notion of a clock is added. The
    concept of stuttering is motivated and other temporal logic
    formalisms as of 1983 are briefly surveyed. Finally, Lamport
    elaborates on the hierarchy of programming languages, starting
    from high level specifications and ending at the quantum level of
    electrons. Temporal logic can provide a framework for reasoning at
    all these levels.",
}

@Article{Schlichting:1983:FSP,
  author =       "Richard D. Schlichting and Fred B. Schneider",
  title =        "Fail stop processors: {An} approach to designing
                  fault-tolerant computing systems",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-TOCS,
  year =         "1983",
  volume =       "1",
  number =       "3",
  pages =        "222--238",
  month =        aug,
  OPTnote =      "",
  annote =       "[to read]"
}




@Book{Strohrmann:1983:AMM,
  author = 	 {G. Strohrmann},
  ALTeditor = 	 {},
  title = 	 {{Anlagensicherung mit Mitteln der MSR-Technik}},
  publisher = 	 {Oldenburg},
  year = 	 {1983},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {M\"unchen},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}

@InProceedings{Broder:1984:FCM,
  title =        "Flipping coins in many pockets ({Byzantine} agreement
                 on uniformly random values)",
  author =       "Andrei Z. Broder and Danny Dolev",
  pages =        "157--170",
  booktitle =    "25th Annual Symposium on Foundations of Computer
                 Science",
  month =        "24--26 " # oct,
  year =         "1984",
  address =      "Singer Island, Florida",
  organization = "IEEE",
  annote = "Discusses randomized Byzantine agreement where a set of
  processes agree on a common bit using a random coin. Gives algorithm
  which works if the faulty processes are not the majority. Extends
  the impossibility result for deterministic consensus by showing that
  there is no Byzantine agreement protocol tolerant against $t$
  fail-stop faults that works in less than $t+1$ rounds."
}

@Article{Dijkstra:1983:DTD,
  author =       "Edsger W. Dijkstra and W. H. J. Feijen and A. J. M.
                 {van Gasteren}",
  title =        "Derivation of a Termination Detection Algorithm for
                 Distributed Computations",
  journal =      "Information Processing Letters",
  volume =       "16",
  number =       "5",
  pages =        "217--219",
  day =          "10",
  month =        jun,
  year =         "1983",
  coden =        "IFPLAT",
  ISSN =         "0020-0190",
  mrclass =      "68B05 (68C05)",
  mrnumber =     "84m:68005",
  bibdate =      "Wed Nov 11 12:16:26 MST 1998",
  acknowledgement = ack-nhfb,
  classification = "723; B6210L (Computer communications); C5620
                 (Computer networks and techniques); C6150J (Operating
                 systems)",
  corpsource =   "Burroughs, AL Nuenen, Netherlands",
  journalabr =   "Inf Process Lett",
  keywords =     "computer programming; distributed computations;
                 distributed processing; networks; protocols;
                 termination detection algorithm",
  treatment =    "P Practical",
}

@InCollection{Echtle:1984:FSV,
  author = 	 "Klaus Echtle",
  title = 	 "{Fehlermodellierung} bei {Simulation} und
		  {Verifikation} {von} {Fehlertoleranz-Algorithmen}
		  {f\"ur} {Verteilte} {Systeme}",
  OPTcrossref =  "",
  OPTkey = 	 "",
  booktitle = "{Software-Fehlertoleranz} und {-Zuverl\"assigkeit}",
  publisher = pub-SV,
  year = 	 "1984",
  editor = 	 "F. Belli and S. Pfleger and M. Seifert",
  OPTvolume = 	 "",
  number = 	 "83",
  series = 	 "Informatik-Fachberichte",
  OPTtype = 	 "",
  OPTchapter = 	 "",
  pages = 	 "73--88",
  OPTaddress = 	 "",
  OPTedition = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "(in German)",
  annote = 	 "Two types of fault models are described and
		  compared: low level fault specifications (LLFS,
		  `aufz{\"a}hlendes Fehlermodell') and high level fault
		  specifications (HLFS, `spezifizierendes
		  Fehlermodell'). LLFS consist of a detailed
		  description of what type of faults may happen and
		  when/where they are supposed to occur (e.g., send
		  omission etc.). They are well suited for simulation
		  and testing. HLFS are a high level description of
		  how a node's behavior changes in the presence of
		  faults. This is expressed at the interfaces between
		  nodes of a distributed system: usually the actions
		  at an interface reflect certain requirements of a
		  protocol specifications. The occurence of faults at
		  a node weaken these requirements. To an extreme
		  (Byzantine behavior), there are no restrictions on
		  what might happen at an interface. HLFS influence
		  interface specifications and are only suited for
		  verification purposes. Both LLFS and HLFS are
		  compared according to their suitability for
		  verification. Finally, the importance of hierarchic
		  fault modelling is stressed to master
		  complexity. This can be seen as an early predecessor
		  of the concept of multitolerance
		  \cite{Arora:1998:CDM}."
}

@PhdThesis{Hadzilacos:1984:IFT,
  author = 	 "Vassos Hadzilacos",
  title = 	 "Issues of Fault Tolerance in Concurrent Computations",
  school = 	 "Harvard University",
  year = 	 "1984",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTaddress = 	 "",
  OPTmonth = 	 "",
  OPTtype = 	 "",
  note = 	 "also published as Technical Report TR11-84.",
  annote = 	 "First mentioning of send omission type of
		  faults. Reference found in
		  \cite{Schneider:1993:WGM,Hadzilacos:1994:MAF}."
}

@Article{Lamport:1984:UTI,
  author =       "Leslie Lamport",
  title =        "Using Time Instead of Timeout for Fault-Tolerant
                 Distributed Systems",
  journal =      j-TOPLAS,
  volume =       "6",
  number =       "2",
  year =         "1984",
  month =        apr,
  annote =       "[not by me:] processes are synchronized by clocks, 
                 and the clocks
                 are synchronized using the Byzantine Generals solution.
                 Time intervals are used. [to get]",
}

@Article{Lundelius:1984:ULB,
  title =        "An Upper and Lower Bound for Clock Synchronization",
  author =       "Jennifer Lundelius and Nancy Lynch",
  pages =        "190--204",
  journal =      "Information and Control",
  month =        aug # "/" # sep,
  year =         "1984",
  volume =       "62",
  number =       "2/3",
  annote =       "Prove a result similar to \cite{Dolev:1986:PIA}: The
    clocks of $n$ processes cannot be deterministically synchronized
    more closely than $e(1-1/n)$, where $e$ is the un certainty of message
    delivery times. The assumptions are clocks running at the same
    speed but initialized differently, the given uncertainty $e$,
    and no failures. The graph is completely connected. The result 
    states how close clock values can be at the same real time,
    wheras \cite{Dolev:1986:PIA} characterize how close the 
    real times can be when clocks show the same value. The idea of
    the proof is to construct runs which look the same to the
    processes but result in different clock values/real times at
    different points."
}

@TechReport{Shah:1984:DSS,
  pages =        "14",
  year =         "1984",
  type =         "Technical Report",
  number =       "TR84-624",
  title =        "Distributed Snapshots In Spite of Failures",
  author =       "Amitabh Shah and Sam Toueg",
  abstract =     "An extension of the Chandy-Lamport algorithm
                 ([Chan84]) to find global states of distributed systems
                 is presented where benign failures of processes and
                 channels are permitted. The scope of the algorithm in
                 detecting stable properties in distributed systems is
                 discussed. As an application, an algorithm to detect
                 deadlocks in failure-prone distributed systems is
                 presented.",
  institution =  "Cornell University, Computer Science Department",
  month =        jul,
  notes =        "Revised February 1985",
  annote = "Extends the Chandy-Lamport snapshot algorithm
    \cite{Chandy:1985:DSD} to deal with crash-recover faults and
    message losses. The system model is the asynchronous one of
    \cite{Chandy:1985:DSD} and the algorithm uses a simple timeout
    mechanism to check the functional state of neighboring processes
    (today we call this an unreliable failure detector). Channels are
    FIFO and flushing messages are used just like in
    \cite{Chandy:1985:DSD}. However, due to obvious impossibilities
    the notion of a consistent cut must be weakened in a way that
    includes uncertainty. Termination is guaranteed by the timeout
    solution, but the result may be `uncertain' making it necessary to
    restart the algorithm again. It doesn't seem to be guaranteed that
    eventually a stable predicate is detected because of possible
    channel failures (what about false suspicions and virtual
    partitions?). Has this algorithm been published elsewhere?"
}

@Article{Spector:1984:SSP,
  author =       {Alfred Spector and David Gifford},
  title =        {The space shuttle primary computer system},
  journal =      j-CACM,
  year =         1984,
  OPTkey =       {},
  volume =       27,
  number =       9,
  OPTmonth =     {},
  pages =        {874--900},
  OPTnote =      {},
  annote =       {A detailed description of the computer system that
                  runs the space shuttle.}
}


@Article{Alpern:1985:DL,
  author =       {Bowen Alpern and Fred B. Schneider},
  title =        {Defining liveness},
  journal =      j-IPL,
  year =         1985,
  OPTkey =       {},
  volume =       21,
  OPTnumber =    {},
  OPTmonth =     {},
  pages =        "181--185",
  OPTnote =      {},
  annote =       "Standard definitions of system properties, safety
                  and liveness. Shows that every nontrivial system
                  property can be expressed as an intersection of a
                  safety property and a liveness property. Terms
                  safety and liveness defined by Lamport
                  \cite{Lamport:1977:PCM}."
}




@Article{Bracha:1985:ACB,
  author =       "Gabriel Bracha and Sam Toueg",
  title =        "Asynchronous Consensus and Broadcast Protocols",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-ACM,
  year =         "1985",
  volume =       "32",
  number =       "4",
  pages =        "824--840",
  month =        oct,
  OPTnote =      "",
  annote =       "The authors investigate probabilistic consensus
                  protocols for ``FLP'' model
                  \cite{Fischer:1985:IDC}. Probabilities are
                  introduced by making assumptions about the message
                  subsystem, i.e. the probability that a node receives
                  a message from all non-faulty nodes can be
                  calculated. For the fail-stop model half of the
                  nodes may be faulty to still achieve consensus with
                  probability 1, for Byzantine faults at most one
                  third may be faulty. The relevant protocols and an
                  application to reliable broadcast are given."
}

@Article{Chandy:1985:DSD,
  author = 	 {K. M. Chandy and Leslie Lamport},
  title = 	 {Distributed snapshots: determining global states of
                  distributed systems},
  journal = 	 {ACM Transactions on Computing Systems},
  year = 	 {1985},
  OPTkey = 	 {},
  volume = 	 {3},
  number = 	 {1},
  OPTmonth = 	 {},
  pages = 	 {63--75},
  OPTnote = 	 {},
  annote = 	 {nicht kopiert}
}

@InProceedings{Coan:1985:DFS,
  title =        "The Distributed Firing Squad Problem (Preliminary
                 Version)",
  author =       "Brian A. Coan and Danny Dolev and Cynthia Dwork and
                 Larry Stockmeyer",
  pages =        "335--345",
  booktitle =    "Proceedings of the Seventeenth Annual {ACM} Symposium
                 on Theory of Computing",
  month =        "6--8 " # may,
  year =         "1985",
  address =      "Providence, Rhode Island",
  annote =       "[to read]"
}

@Article{Cristian:1985:RAF,
  author =       {Flaviu Cristian},
  title =        {A rigorous approach to fault-tolerant programming},
  journal =      j-IEEE-TRANS-SOFTW-ENG,
  year =         1985,
  OPTkey =       {},
  volume =       11,
  number =       1,
  month =        jan,
  pages =        "23--31",
  OPTnote =      {},
  annote =       "First idea of defining faults as spontaneous actions
                  on an extended system space."
}


@Article{Fischer:1985:IDC,
  author =       {Michael J. Fischer and Nancy A. Lynch and Michael S.
                  Paterson},
  title =        {Impossibility of distributed consensus with one
                  faulty process},
  journal =      j-ACM,
  year =         1985,
  volume =       32,
  number =       2,
  month =        apr,
  pages =        "374--382",
  OPTnote =      {},
  OPTannote =    "Landmark paper in fault-tolerant distributed
                  computing. The system considered is completely
                  asynchronous, nodes may stop by halting (crash
                  failure) but may not exhibit hostile (Byzantine)
                  behaviour, the message system is reliable with a
                  reliable broadcast primitive, no synchronized clocks
                  or the possibility to detect failures are
                  assumed. The authors show that every non-trivial
                  execution can go on forever without reaching a
                  result, because it is in effect not possible to
                  distinguish a crashed node from one that is merely
                  very slow. The proof is very detailed and is based
                  non non-constructive methods that produce a
                  contradiction from opposite assumptions. (Proof is
                  explained in other words in \cite{Turek:1992:MFC}.)"
}


@Article{Halpern:1985:OPP,
  title =        "Optimal Precision in the Presence of Uncertainty",
  author =       "Joseph Y. Halpern and Nimrod Megiddo and Ashfaq A.
                 Munshi",
  pages =        "170--196",
  journal =      "Journal of Complexity",
  year =         "1985",
  month =        dec,
  volume =       "1",
  number =       "2",
  annote =       "Analyzes the imprecision inherent in distributed 
    systems that have uncertain message delays. Takes the model of
    \cite{Dolev:1986:PIA} and wants to execute coordinated actions
    (instead of doing clock synchronization). Assumes that hardware
    clocks run at the same rate, yet may be initialized differently,
    and that messages have a maximum delivery delay. Basically enriches
    the lower bound of \cite{Dolev:1986:PIA}, and states that
    probabilistic algorithms can do no better (with certainty). Hmm,
    see \cite{Cristian:1989:PCS}. Investigates the situation in
    which there a Byzantine nodes. Gives an algothm to compute 
    optimal precision in cases without faults and bounded precision
    in cases with faults."          
}

@Book{Hoare:1984:CSP,
  author =       "C. A. R. Hoare",
  title =        "Communicating Sequential Processes",
  publisher =    "Prentice-Hall",
  year =         "1985",
}


@Article{Awerbuch:1985:CNS,
  author = 	 {Baruch Awerbuch},
  title = 	 {Complexity of Network Synchronization},
  journal = 	 {Journal of the ACM},
  year = 	 {1985},
  OPTkey = 	 {},
  volume = 	 {32},
  number = 	 {4},
  pages = 	 {804--823},
  month = 	 oct,
  OPTnote = 	 {},
  OPTannote = 	 {}
}


@InProceedings{Laprie:1985:DCF,
  author =       "J. C. Laprie",
  title =        "Dependable computing and fault tolerance: concepts
                  and terminology",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  pages =        "2--11",
  booktitle = pro-ftcs85,
  year =         "1985",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  month =        jun,
  OPTnote =      "",
  OPTannote =    "[to read]"
}


@Article{Lamport:1985:SCP,
  author =       "Leslie Lamport and P. M. Melliar-Smith",
  title =        "Synchronizing Clocks in the Presence of Faults",
  journal =      "Journal of the ACM",
  volume =       "32",
  number =       "1",
  pages =        "52--78",
  month =        jan,
  year =         "1985",
  url =          "http://www.acm.org/pubs/toc/Abstracts/0004-5411/2457.html",
  abstract =     "Algorithms are described for maintaining clock
                 synchrony in a distributed multiprocess system where
                 each process has its own clock. These algorithms work
                 in the presence of arbitrary clock or process failures,
                 including ``two-faced clocks'' that present different
                 values to different processes. Two of the algorithms
                 require that fewer than one-third of the processes be
                 faulty. A third algorithm works if fewer than half the
                 processes are faulty, but requires digital
                 signatures.",
  keywords =     "algorithms; Byzantine failures; clocks, electric ---
                 Synchronization; computer programming --- Algorithms;
                 computer systems programming --- Multiprocessing
                 Programs; computer systems, digital; Fault Tolerant
                 Capability; interactive convergence algorithm;
                 reliability; theory; verification; Zeitliche Ordnung",
  annote =       "investigates Byzantine clock synchronization. Surveyed
                 in \cite{Ramanathan:1990:FCS}. [to get]"
}


@Article{Arora:1986:DTD,
  author = 	 {Rada Krishan Arora and S. P. Rana and M. N. Gupta},
  title = 	 {Distributed termination detection algorithm for
                  distributed computations},
  journal = 	 ipl,
  year = 	 1986,
  OPTkey = 	 {},
  volume = 	 22,
  OPTnumber = 	 {},
  month = 	 "May",
  pages = 	 "311--314",
  annote = "See also \cite{Tan:1986:CDT,Arora:1988:MCD}."
}



@Article{Berglund:1986:IV,
  author =       "Eric J. Berglund",
  title =        "An introduction to the {V}-system",
  journal =      "IEEE Micro",
  volume =       "6",
  number =       "4",
  pages =        "35--52",
  month =        aug,
  year =         "1986",
  annote =       "[to read]"
}

@Article{Chandy:1986:HPL,
  author =       "K. M. Chandy and Jayadev Misra",
  title =        "How processes learn",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-DC,
  year =         "1986",
  volume =       "1",
  OPTnumber =    "",
  pages =        "40--52",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "A formal article on knowledge of processes and how
                  it is gained and lost. The notion of knowledge is
                  defined using the concept of isomorphism. Two system
                  computations are isomorphic with respect to a
                  process if the behaviour of the process is identical
                  in both computations. This means essentially that
                  ``a process cannot distinguish between them''. A
                  fact that is valid in all indistinguishable
                  computations is said to be known by a
                  process. An important type of predicate is a local
                  predicate (which is affected merely by state changes
                  on one process). These results can be applied to
                  situations in which the question is asked: Is a
                  process unsure about a fact? These scenarios include
                  the impossibility to detect whether a process has
                  crashed. The theory is also applied to show that
                  there must be causal message chains in mutual
                  exclusion protocols and that the complexity of
                  termination detection is at least as large as the
                  message complexity of the underlying computation."
}

@InProceedings{Cleve:1986:LSC,
  title =        "Limits on the Security of Coin Flips when Half the
                 Processors Are Faulty (Extended Abstract)",
  author =       "Richard Cleve",
  pages =        "364--369",
  booktitle =    "Proceedings of the Eighteenth Annual {ACM} Symposium
                 on Theory of Computing",
  month =        "28--30 " # may,
  year =         "1986",
  address =      "Berkeley, California",
  annote = "The 2-processor-bit-selection problem is to devise a
                 protocol between two processes $A$ and $B$ with the
                 following properties: $A$ and $B$ start with a random
                 bit value and after termination of the protocol both
                 processes output a value $a$ and $b$, respectively,
                 where $a=b$ (agreement). Processes internally have
                 access to a random variable. A weaker definition of
                 agreement states, that the probability that $a=b$
                 must be bounded from below by $1-O(1/n^k)$ where $n$
                 and $k$ are not clear to me. A
                 2-processor-bit-selection-scheme is secure if the
                 protocol achieves the weaker definition of agreement
                 (or better) even in the case where one process is
                 replaced by a faulty one. The author gives an
                 impossibility result stating that there exists no
                 secure 2-processor-bit-selection protocol (Section
                 2.2). (I didn't get the idea behind the proof.) This
                 result is extended to a definition of an
                 $s$-processor-bit-selection scheme. The new result
                 states that it is impossible to reach (weak)
                 agreement if $\lceil s/2 \rceil$ of the processors
                 are faulty. The paper must be seen in the context of
                 probabilistic Byzantine agreement, I suppose."
}

@InProceedings{Cristian:1986:CSP,
  author =       "F. Cristian and H. Aghili and R. Strong",
  title =        "Clock Synchronization in the Presence of Omission and
                 Performance Faults",
  booktitle =    pro-ftcs86,
  pages =        "218--223",
  publisher =    pub-IEEE,
  address =      "Vienna, Austria",
  year =         "1986",
  annote =       "Revised version read as \cite{Cristian:1994:CSP}."
}

@Article{Dolev:1986:PIA,
  author = 	 {Danny Dolev and Joseph Y. Halpern and H. Raymond Strong},
  title = 	 {On the possibility and impossibility of achieving clock 
                  synchronization},
  journal = 	 {Journal of Computer and System Sciences},
  year = 	 {1986},
  OPTkey = 	 {},
  volume = 	 {32},
  number = 	 {2},
  pages = 	 {230--250},
  month = 	 apr,
  OPTnote = 	 {},
  annote = 	 {The authors prove that clock synchronization is impossible
    without authentication if at least one third of the processors are 
    faulty.  They also give a lower bound on the precision of local clocks:
    Define $U$ to be the maximum uncertainty in the network, i.e. the
    maximum difference between minimum and maximum message transmission
    time for any pair of directly connected processes.  The imprecision 
    of local clocks is at least half the uncertainty, i.e. there is no
    algorithm that synchronizes clocks of two adjacent processes closer 
    than $U/2$. An extended result appears in \cite{Halpern:1985:OPP}.
    A result similar to this can be found in \cite{Lundelius:1984:ULB}
    (see the discussion there).}
}

@Article{Dolev:1986:RAA,
  author = 	 "Danny Dolev and Nancy A. Lynch and Shlomit S. Pinter
		  and Eugene W. Stark and William E. Weihl",
  title = 	 "Reaching approximate agreement in the presence of faults",
  OPTcrossref =  "",
  OPTkey = 	 "",
  journal = 	 j-ACM,
  year = 	 "1986",
  volume = 	 "33",
  number = 	 "3",
  pages = 	 "499--516",
  month = 	 jul,
  OPTnote = 	 "",
  annote = 	 "[to read]"
}

@Article{Fischer:1986:EIP,
  author =       "Michael J. Fischer and Nancy A. Lynch and Michael
                  S. Paterson",
  title =        "Easy impossibility proofs for distributed consensus
                  problems",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-DC,
  year =         "1986",
  volume =       "1",
  OPTnumber =    "",
  pages =        "26--39",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "[to read]"
}

@Book{Francez:1986:F,
  author =       "Nissim Francez",
  title =        "Fairness",
  publisher =    pub-SV,
  series =       "Texts and Monographs in Computer Science",
  year =         "1986",
  annote =       "A thoughrough book on many notions of fairness in
                 scheduling concurrent actions. Possibly outdated 
                 because many new notions seem to have appeared (any
                 references?"
}

@Article{Liskov:1986:SDP,
  author =       "Barbara Liskov and William Weihl",
  title =        "Specifications of Distributed Programs",
  journal =      j-DC,
  publisher =    pub-SV,
  year =         "1986",
  volume =       "1",
  pages =        "102--118",
  annote = "An early advocate of having two seperate sets of
    specifications: one for the normal operation and a weaker one for
    ``abnormal'' behavior (the tolerance specification of
    \cite{Gaertner:1999:ESD}). The authors argue that this is user
    friendly and also simplifies the specifications. Several examples
    of such specifications are given (which I did not look at in
    detail). The conclusions contain a somewhat misleading discussion
    on why liveness is not the correct property to describe abnormal
    behavior. Rather, the likelihood of abnormal behavior should be
    specified (but this is a point of future work). At the end, the
    authors indicate that having a tolerance specification eases the
    understanding of implementation constraints and so a tolerance
    specification is also of use to implementors. The tolerance
    specification can be seen as the ``first refinement'' of the
    original specification.",
}

@Article{Moses:1986:CHO,
  author = 	 "Yoram Moses and Danny Dolev and Joseph Y. Halpern",
  title = 	 "Cheating husbands and other stories: {A} case study of
		  knowledge, action, and communication",
  OPTcrossref =  "",
  OPTkey = 	 "",
  journal = 	 j-DC,
  year = 	 "1986",
  volume = 	 "1",
  OPTnumber = 	 "",
  pages = 	 "167--176",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "The authors again take the cheating husbands puzzle
		  to show subtle interactions between knowledge,
		  action, and communication in distributed
		  systems. They discuss the cases of asynchronous
		  communication, synchronous communication, weakly
		  synchronous communication with bound b, and
		  asymmetry in communication (ring topology). The
		  relationship to eventual common knowledge, common
		  knowledge and b-common knowledge are
		  shown. Moreover, in the synchronous case, faulty
		  nodes can compilcate the matter again (disobedient
		  wives). This paper is shorter and thus a little more
		  introductory that a later one
		  \cite{Halpern:1990:KCK}."
}

@Article{Myers:1986:CSF,
  author = 	 {W. Myers},
  title = 	 {Can software for the strategic defense initiative
                  ever be error free?},
  journal = 	 {IEEE Computer},
  year = 	 {1986},
  OPTkey = 	 {},
  volume = 	 {19},
  number = 	 {11},
  OPTpages = 	 {},
  month = 	 nov,
  OPTnote = 	 {},
  annote = 	 {Presents figure that there are about 3.3 software errors
                  per 1000 LoC. Peter G. Neumann comments on this in
                  `Inside Risks' in late 2000 CACM.}
}


@Article{Perry:1986:DAP,
  title =        "Distributed Agreement in the Presence of Processor and
                 Communication Faults",
  author =       "Kenneth J. Perry and Sam Toueg",
  journal =      j-IEEE-TRANS-SOFTW-ENG,
  pages =        "477--482",
  month =        mar,
  year =         "1986",
  volume =       "12",
  number =       "3",
  annote =       "First to define the general omission fault model
		  consisting of crash, send- and receive-omission
		  failures. [to get]"
}



@Article{Tan:1986:CDT,
  author = 	 {Richard B. Tan and Gerard Tel and Jan {van Leeuwen}},
  title = 	 {Comments on {``Distributed termination detection
                  algorithm for distributed computations''} ({Letter} to
                  the {Editor})},
  journal = 	 ipl,
  year = 	 1986,
  OPTkey = 	 {},
  volume = 	 23,
  OPTnumber = 	 {},
  month = 	 "October",
  pages = 	 "163",
  annote = "Notes an error in the algorithm of \cite{Arora:1986:DTD}.
    See also \cite{Arora:1988:MCD}."
}


@InProceedings{Attiya:1987:ACA,
  author =       "Hagit Attiya and Amotz Bar-Noy and Danny Dolev and
                  Daphne Koller and David Peleg and R{\"u}diger Reischuk",
  title =        "Achievable cases in an asynchronous environment",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  pages =        "337--346",
  booktitle = "Proceedings of the 28th annual Symposium on the
                  Foundations of Computer Science",
  year =         "1987",
  OPTorganization = "",
  publisher = pub-IEEE-CSP,
  OPTaddress =      pub-IEEE-CSP:adr,
  month =        oct,
  OPTnote =      "",
  annote =       "The authors consider several problems and show that
                  they are achievable in asynchronous systems despite
                  that fact that things like consensus
                  aren't. Problems considered are renaming of
                  processors to compact the name space and the
                  ``multi-slot critical section problem'' (which is
                  multual exclusion for more than one processor."
}

@Book{Bernstein:1987:CCR,
  author = 	 {P. Bernstein and V. Hadzilacos and N. Goodman},
  title = 	 {Concurrency Control and Recovery in Database Systems},
  publisher = 	 pub-AW,
  year = 	 {1987},
  OPTnote = 	 {},
  OPTannote = 	 {H.2.5/Bern nicht am Ort}
}

@article{Birman:1987:RCP,
 author         ={K.P. Birman and T.A. Joseph},
 title          ={Reliable Communication in the Presence of Failures},
 journal        ={ACM Transactions on Computer Systems},
 volume         ={5},
 number         ={1},
 month          =feb,
 year           ={1995},
 pages          ={47--76},
 annote         ={First reference to causal order, the generalization of
                  Lamport's happened-before \cite{Lamport:1978:TCO}.}
}


@Article{Brooks:1987:NSB,
  author =       "Frederick P. Brooks",
  title =        "No Silver Bullet",
  journal =      j-IEEE-COMPUTER,
  volume =       "20",
  number =       "4",
  pages =        "10--19",
  month =        apr,
  year =         "1987",
  annote =       "A famous paper on the ``essence and accidents in
                  software engineering''. Brooks explores reasons for
                  the fact that despite high hopes and great claims
                  the software industry and computer science academia
                  has failed to produce really reliable, error-free
                  products. Brooks discusses facts like complexity and
                  psychological problems for people involved. He states
                  that the problem is in it's core rather a human than
                  a technical issue."
}



@Article{Dolev:1987:MSN,
  author =       "Danny Dolev and Cynthia Dwork and Larry Stockmeyer",
  title =        "On the minimal synchronism needed for distributed consensus",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-ACM,
  year =         "1987",
  volume =       "34",
  number =       "1",
  pages =        "77--97",
  month =        jan,
  OPTnote =      "",
  annote =       "This is a refinement work of the paper by Fischer,
                  Lynch and Paterson \cite{Fischer:1985:IDC}. The
                  consensus problem is investigated in various
                  different system models. Critical parameters that
                  emerge are: processors synchronous/asynchronous,
                  communication synchronous/asynchronous, message
                  order synchronous/asynchronous, broadcast
                  transmission or point to point, atomic receive/send
                  or separate receive/send. The minimal cases to
                  achieve consensus are: (1) synchronous processors
                  and synchronous communication, (2) synchronous
                  processors and synchronous message order, (3)
                  synchronous message order and broadcast
                  communication, (4) synchronous communication,
                  broadcast transmission, and atomic receive/send. The
                  intuition behind the results is that the system
                  shouldn't be able to ``hide a critical
                  step''. Probabilistic algorithms are not
                  investigated. The proofs in this paper are large and
                  quite intrinsic."
}

@Article{Jifeng:1987:ASP,
  title =        "Algebraic Specification and Proof of a Distributed
                 Recovery Algorithm",
  author =       "He Jifeng and C. A. R. Hoare",
  journal =      "Distributed Computing",
  pages =        "1--12",
  year =         "1987",
  volume =       "2",
  number =       "1",
  annote = "A masking fault tolerant implementation to a crash-recover
  process is presented and proved using basics of CSP. Two different
  implementations are presented: one that simply replays and one that
  uses checkpoints. Faults are detected instantaneously and ``the only
  subtle point is to ensure the correct outcome even when [faults]
  occur in the middle of the recovery procedure.'' (p. 2) I didn't
  find this point though in the proof. The discussion makes a few good
  points and contributes to the overall quality of the paper: (1)
  instead of having a general purpose mechanism to prove any system,
  every application area probably will have its adopted calculus:
  ``Nevertheless, even for a grossly over-simlified problem, the
  algebraic calculations are non-trivial. This probably jas to be
  accepted as inevitable in any serious application of mathematics to
  engineering. The calculations can be simplified by prior development
  of a calculus adapted more to the specifica needs of a problem. It
  will be interesting to see how far such calculi are applicable to
  mor general classes of problems; but it seems quite likely that they
  will not. Again, we may have to accept that each application will
  require derivation of specialized laws to control its complexity.''
  (page 9) (2) recovery with non-instantaneous fault detection can
  probably be based on logical time, (3) non-deterministic processes
  cannot use this type of recovery. A weakened specification is
  necessary."
}

@Article{Joseph:1987:PRF,
  title =        "Proof Rules for Fault Tolerant Distributed Programs",
  author =       "Mathai Joseph and Abha Moitra and Neelam
                 Soundararajan",
  pages =        "43--67",
  journal =      "Science of Computer Programming",
  month =        feb,
  year =         "1987",
  volume =       "8",
  number =       "1",
  annote= "The authors attempt to develop a set of rules to prove the
  correctness of CSP programs \cite{Hoare:1984:CSP} in faulty
  environments. The failure model is that of fail-stop, i.e. the
  authors assume detectable crash faults and recovery without stable
  storage. The method concentrates on partial correctness of
  terminating processes and also on the invariants of non-terminating
  processes (i.e. it concentrates on safety properties). The proof
  rules show how the interface (i.e. communication) behavior of
  processes is weakened by the failure model and how the behavior of
  the complete system can be obtained from the behaviors of the
  individual processes. The recovery-aspect of the failure model
  weakens the achievable safety property because of possible
  repetitions. But overall, the global invariant is the conjunction of
  the local invariants provided that the processes are ``compatible''
  (meaning that their communication behavior matches). Channels are
  assumed to be reliable. Sect. 6 contains the first derivation rule
  of weaker safety properties that I know of. A bounded buffer is
  taken as an example. The future work section discusses general
  liveness properties and states that they are difficult to prove!
  The basic fault-tolerance methodology involved here is based on
  detection (which is assumed to be automatic) and correction through
  recovery actions. This is the basis of later work in this direction
  \cite{Peled:1994:CFF,Arora:1998:CDM,Arora:1998:DCT}."
}

@Article{Mattern:1987:ADT,
  author = 	 {Friedemann Mattern},
  title = 	 {Algorithms for distributed termination detection},
  journal = 	 j-DC,
  year = 	 {1987},
  OPTkey = 	 {},
  volume = 	 {2},
  number = 	 {3},
  pages = 	 {161-175},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[to get]}
}


@Article{Moran:1987:EIR,
  author =       "Shlomo Moran and Yaron Wolfstahl",
  title =        "Extended impossibility results for asynchronous
                 complete networks",
  journal =      "Information Processing Letters",
  volume =       "26",
  number =       "3",
  pages =        "145--151",
  day =          "23",
  month =        nov,
  year =         "1987",
  affiliationaddress = "Technion Israel Inst of Technology, Haifa, Isr",
  journalabr =   "Inf Process Lett",
  keywords =     "asynchronous complete networks; computational
                 complexity; computer fault tolerance; computer systems,
                 digital; consensus problem; consensus task; decision
                 graph; Distributed; distributed computation;
                 distributed processing; fault tolerance; fault tolerant
                 computing; graph theory; impossibility results;
                 mathematical techniques --- Graph Theory; protocol;
                 protocols; reliability; standardization; theory;
                 unsolvability; verification",
  annote = "[to read]"
}

@Book{Raynal:1987:NDC,
  author =       "Michel Raynal",
  title =        "Networks and Distributed Computation: Concepts, Tools,
                 and Algorithms",
  publisher =    "North Oxford Academic Publishers",
  address =      "London",
  year =         "1987",
  keywords =     "book, text, parallel processing, supercomputers,",
  ISBN =         "0-946536-27-9",
  note =         "Original French language edition Systemes repartis 
                  et reseaux (1987), translated by Meg Sanders",
}

@Article{Srikanth:1987:OCS,
  author =       "T. K. Srikanth and Sam Toueg",
  title =        "Optimal Clock Synchronization",
  journal =      J-ACM,
  volume =       "34",
  number =       "3",
  pages =        "626--645",
  month =        jul,
  year =         "1987",
  url =          "http://www.acm.org/pubs/toc/Abstracts/0004-5411/28876.html",
  abstract =     "We present a simple, efficient, and unified solution
                 to the problems of synchronizing, initializing, and
                 integrating clocks for systems with different types of
                 failures: crash, omission, and arbitrary failures with
                 and without message authentication. This is the first
                 known solution that achieves optimal accuracy - the
                 accuracy of synchronized clocks (with respect to real
                 time) is as good as that specified for the underlying
                 hardware clocks. The solution is also optimal with
                 respect to the number of faulty processes that can be
                 tolerated to achieve this accuracy.",
  keywords =     "algorithms; Byzantine failures; computer programming
                 --- Algorithms; computer systems, digital; Distributed;
                 message authentication; optimal clock synchronization;
                 reliability; synchronizing in presence of faults;
                 theory; verification",
  annote =       "Assumes that drist rate is bounded on processes and
    that there is a maximum message delivery delay.  Gives tolerance
    specification of clock synchronization. Shows lower bound on 
    accuracy dependent on the drift rate of clocks: the bound on the 
    drift rate of logical clocks is at least as large as the bound of
    drift of the physical clocks (Theorem 2). They present an algorithm
    which reaches this bound."
}

@Article{Apt:1988:AFL,
  title =        "Appraising Fairness in Languages for Distributed
                 Programming",
  author =       "Krzysztof R. Apt and Nissim Francez and Shmuel Katz",
  journal =      "Distributed Computing",
  pages =        "226--241",
  year =         "1988",
  volume =       "2",
  number =       "4",
  annote = "A general formulation of fairness is: if a certain choice
    is possible infinitely often, then it is sufficiently often taken.
    Precise formulations depend on how `choice', `possible' and
    `sufficiently often' are defined. The authors propose three basic
    criteria which any sensible definition of fairness should have in
    any model: feasibility, equivalence robustness, and liveness
    enhancement. (a) Fairness usually rules out certain traces which would
    be acceptable in the given model of computation. Feasibility
    ensures that after ruling out unfair traces, still valid traces
    remain. More precisely, feasibility requires that for every point
    in a computation it should be possible to extend it to be a fair
    one. This is related to the notion of machine closure
    \cite{Lamport:2000:FAH}. (b) Equivalence robustness means that
    if a trace x is fair, then a trace y must also be fair where y
    results from x by resorting `independent' actions. (c) Liveness
    enhancement means that all distributed system models assume a
    fundamental liveness property, meaning for example that eventually
    the system will take a step if it is not deadlocked. A fairness
    definition must give `additional value' to such an assumption,
    i.e., there must be a program which has a liveness property only
    if the additional fairness requirement holds."
}


@Article{Arora:1988:MCD,
  author = 	 {Rada Krishan Arora and M. N. Gupta},
  title = 	 {More comments on {``Distributed termination detection
                  algorithm for distributed computations''} ({Letter} to
                  the {Editor})},
  journal = 	 ipl,
  year = 	 1988,
  OPTkey = 	 {},
  volume = 	 29,
  OPTnumber = 	 {},
  month = 	 {September},
  pages = 	 {53--55},
  annote = "See also \cite{Arora:1986:DTD,Tan:1986:CDT}. Tries to fix
    the error."
}


@Book{Chandy:1988:PPD,
  author =       "K. Mani Chandy and Jayadev Misra",
  title =        "Parallel Program Design: {A} Foundation",
  publisher =    pub-AW,
  address =      "Reading, Mass.",
  year =         "1988",
  annote =       "[to read]"
}

@Article{Dwork:1988:CPP,
  author =       "Cynthia Dwork and Nancy Lynch and Larry Stockmeyer",
  title =        "Consensus in the presence of partial synchrony",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-ACM,
  year =         "1988",
  volume =       "35",
  number =       "2",
  pages =        "288--323",
  month =        apr,
  OPTnote =      "",
  annote =       "The authors study practically motivated models of
                  synchrony that lie between fully asynchronous and
                  fully synchronous systems in which consensus shall
                  be achieved. The models of partial synchrony studied
                  include: (1) upper bounds on processor speeds and
                  message latency exist but are unknown, and (2) upper
                  bounds exists and are known, but only hold after
                  some unknown time (eventually). In both cases
                  consensus with different resiliency can be achieved."
}

@InProceedings{Fidge:1988:TMP,
  author = 	 {Colin J. Fidge},
  title = 	 {Timestamps in message-passing systems that preserve 
                  partial ordering},
  booktitle = 	 {Proceedings of the 11th Australian Computer Science
                  Conference},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {56--66},
  year = 	 {1988},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  month = 	 feb,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}


@InProceedings{Haban:1988:GEG,
  author =       "Dieter Haban and Wolfgang Weigel",
  title =        "Global Events and Global Breakpoints in Distributed
                 Systems",
  booktitle =    "Proceedings of the Twenty-First Annual Hawaii
                 International Conference on System Sciences",
  year =         "1988",
  month =        jan,
  pages =        "166--175",
  editor =       "Bruce D. Schriver",
  volume =       "II (Software Track)",
  publisher =    pub-IEEE,
  annote =       "[to read]"
}

@InProceedings{Herlihy:1988:RAO,
  author =       "Maurice P. Herlihy and Jeannette M. Wing",
  title =        "Reasoning about Atomic Objects",
  pages =        "193--208",
  ISBN =         "3-540-50302-1",
  editor =       "M. Joseph",
  booktitle =    "Proceedings of the Symposium on Formal Techniques in
                 Real-Time and Fault-Tolerant Systems",
  month =        sep,
  series =       ser-LNCS,
  volume =       "331",
  publisher =    pub-SV,
  year =         "1988",
  annote = "formal proof method for fault tolerant programs, to read"
}

@Book{Isermann:1988:DRS,
  author = 	 {Rolf Isermann},
  title = 	 {{Digitale Regelsysteme, Band I (in German)}},
  publisher = 	 pub-SV,
  year = 	 {1988},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[to read]}
}

@ARTICLE{Kessels:1988:EPS,
        AUTHOR = "J. L. W. Kessels",
        TITLE = "An exercise in proving self-stabilization with a
                  variant function",
        JOURNAL = j-IPL,
        VOLUME = 29,
        YEAR = 1988,
        PAGES = "39--42",
        annote = "Correctness proof of Dijkstra's 3-state mutual
                  exclusion protocol \cite{Dijkstra:1974:SSS} using a
                  bound function. It shows the general technique of
                  proving convergence by a variant function and also
                  exposes the intrinsic dangers and difficulties of
                  this method."
        }





@Article{Knuth:1988:SDS,
  author = 	 {T. Knuth},
  title = 	 {{Schadenfr\"uherkennung durch Schwingungsanalysen --- Neue
                  M\"oglichkeiten in der Instandhaltung}},
  journal = 	 {Der Maschinenschaden},
  year = 	 {1988},
  OPTkey = 	 {},
  volume = 	 {61},
  OPTnumber = 	 {},
  pages = 	 {70--74},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}

@InCollection{Mancini:1988:TTR,
  author =       "Luigi V. Mancini and Guiseppe Pappalardo",
  editor =       "M. J. Warick",
  title =        "Towards a theory of replicated processing",
  booktitle =    "Formal techniques in real-time and fault-tolerant
                 systems",
  series =       ser-LNCS,
  volume =       "331",
  publisher =    pub-SV,
  year =         "1988",
  annote = "specification approach [to read]"
}

@InProceedings{Mattern:1988:VTG-CITE-1989-VERSION,
  author = 	 {Friedemann Mattern},
  title = 	 {Virtual time and global states of distributed systems},
  booktitle = 	 {Proceedings of the International Workshop on Parallel
                  and Distributed Algorithms},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1988},
  editor = 	 {M. Cosnard},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Chateau de Bonas, France},
  OPTmonth = 	 oct,
  OPTorganization = {},
  publisher = {Elsevier},
  OPTnote = 	 {},
  annote = 	 {Reprinted somewhere, but where?}
}


@InProceedings{Miller:1988:BHD,
  author = 	 "Barton P. Miller and Jong-Deok Choi",
  title = 	 "Breakpoints and halting in distributed programs",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  pages = 	 "316--323",
  booktitle = "Proceedings of the 8th International Conference on
		  Distributed Computing Systems",
  year = 	 "1988",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "The authors address the problem of distributed
		  debugging by formally defining distributed
		  predicates and giving algorithms to detect such
		  predicates and to halt a distributed computation in
		  a consistent state once the predicate is
		  satisfied. Predicates are either (1) simple
		  predicates (defined over the state of a single
		  process), (2) disjunctions of simple predicates, (3)
		  linked predicates (a sequence of events ordered by
		  the causality relation) and (4) conjunctions of
		  simple predicates. Types (1) and (2) are easily
		  detectable by detection modules within
		  processes. Type (3) is detected by tracking event
		  occurrences on the processes involved in each item
		  on the predicate chain and following causal
		  dependencies across channels with markers. Type (4)
		  needs a definition of its semantics since there is
		  no single notion of time: $A\land B$ is true
		  whenever $A$ becomes true on one process and $B$
		  subsequently becomes true on another process and $B$
		  causally depends on $A$. If no causal relationship
		  exists between $A$ and $B$, then a central observer
		  is used to detect $A\land B$. An algorithm to detect
		  these predicates and to halt the algorithm is given
		  based on the Chandy/Lamport algorithm to observe
		  computations (requires FIFO channels, but
		  asynchronous system). The paper describes problems
		  related to distributed debugging. The detection of
		  conjunctions implicitly defines ``possibly($A\land
		  B$)'' without stating how to detect this in all
		  cases in a distributed fashion. But this is okay,
		  since this paper rather aims at detecting dynamic
		  properties, and possibly is a static property. See
		  also a good continuation of this work by Babaoglu et
		  al. \cite{Babaoglu:1996:UFS}."
}

@InProceedings{Patterson:1988:CRA,
  author =       "David A. Patterson and Garth Gibson and Randy H.
                 Katz",
  title =        "{A} {C}ase for {R}edundant {A}rrays of {I}nexpensive
                 {D}isks ({RAID})",
  booktitle =    "Proceedings of the ACM Conference on Management
                 of Data (SIGMOD)",
  year =         "1988",
  month =        jun,
  OPTaddress =      "Chicago, IL",
  pages =        "109--116",
  abstract =     "As processor and memory speeds increase at an
                 exponential rate and single disk access times remain
                 relatively constant, it is apparent that I/O bandwidth
                 is likely to become a bottleneck in the performance of
                 systems. One way to address this problem is by using
                 disk arrays, i.e., sets of relatively inexpensive disks
                 which can improve I/O bandwidth via parallel access.
                 The problem with this approach is that simply using
                 disk arrays can drastically reduce reliability. The
                 approach of RAID is to use redundant disks of check
                 data to bring reliability up to acceptable levels
                 (i.e., failure rates better than expected useful life
                 of the disks). Five levels of the RAID design are
                 presented to address the issues of overhead cost (in
                 terms of number of disks), useable storage capacity,
                 and efficiency per disk for various read and write
                 scenarios (i.e, large vs. small). These issues were
                 considered in terms of {\em data rates} (supercomputer
                 applications) and {\em I/O rates} (transaction
                 systems). Level 5 RAID provides the best all around
                 performance by distributing check data across the data
                 disks to increase parallelism.",
  annote = "to read"
}

@Book{Raynal:1988:DAP,
  author =       "Michel Raynal",
  title =        "Distributed Algorithms and Protocols",
  series =       "Wiley Series in Computing",
  pages =        "163",
  publisher =    "John Wiley \& Sons",
  address =      "Chichester, England",
  year =         "1988",
  keywords =     "book, text, parallel processing, supercomputers,
                 electronic data processing -- distributed processing,
                 algorithms, computer network protocols",
  ISBN =         "0-471-91754-0",
  abstract =     "More theoretical book on the fundamental problems in
                 distributed systems and some solutions. 1st. English
                 issue 1988 (the French version was published in
                 1985).\par ** Description ** The use of distributed
                 algorithms offers the prospect of great advances in
                 computing speed. This book provides a clear, practical,
                 and up-to-date guide to distributed algorithms and
                 protocols in the area of control. Much of the material
                 has been heretofore unavailable in English. Each
                 chapter considers a specific aspect of control, with an
                 analysis of the problem, a description of the algorithm
                 for solving it, and proofs of correctness. Chapters can
                 be studied independently to find solutions to
                 particular problems.\par ** Contents ** Introduction to
                 Distributed Algorithms. Election and Mutual Exclusion
                 Algorithms. Algorithms for Detection and Resolution of
                 Deadlock. Algorithms for Detecting Termination.
                 Protocols for Data Transfer. Management of Distributed
                 Data. Problems of Gaining Concensus in the Presence of
                 Uncertainties (or How to Avoid Byzantine Quarrels).
                 References.",
  note =         "Algorithmes distribues et protocoles, translated by
                 Jack Howlett",
}

@InProceedings{Abadi:1989:RUS,
  author = 	 {Mart{\'\i}n Abadi and Leslie Lamport and Pierre Wolper},
  title = 	 {Realizable and unrealizable specifications of 
                  reactive systems},
  booktitle = 	 {Automata, Languages and Programming. 16th 
                  Int.~Colloquium Proceedings},
  OPTcrossref =  {},
  OPTkey = 	 {},
  ages = 	 {1--17},
  year = 	 {1989},
  editor = 	 {G. Ausiello and M. Dezani-Ciancaglini and S. Ronchi 
                  Della Rocca},
  OPTvolume = 	 {},
  number = 	 {372},
  series = 	 ser-LNCS,
  address = 	 {Stresa, Italy},
  month = 	 jul,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},

  annote = {A specification is a formula $E\Rightarrow M$ where $E$ is
    an assumption about the environment and $M$ is a property
    guaranteed by the system (this way of viewing specifications is
    described in
    \cite{Lamport:1989:SAS,Abadi:1993:CS}). Specifications can become
    unrealizable if $E$ asserts some property of the environment
    because this part of the universe is totally outside the control
    of the implementor. Thus, a specification is unrealizable if it
    constrains the environment. This paper studies the exact
    definitions and conditions of realizability. The first approach is
    to define a simple computer and base the definition of
    realizability on the fact that a specification can be implemented
    on such a device. On the other hand, it views a specification as
    the rules for a two-player infinite game where environment and
    system both take turns and try to win. The environment wins if it
    can produce unspecified executions. Otherwise the system wins. It
    turns out that a specification is realizable if the system has a
    winning strategy. Realizability of a specification is a different
    notion than consistency (i.e., whether the set of infinite
    behaviors of the system is nonempty). This paper is very
    theoretical and uses a lot of terminology and concepts that I am
    not familiar with (B\"uchi automata, Borel sets, etc).  The ideas
    of game-playing and specifications appear again in
    \cite{Abadi:1993:CS} in an at least to me more understandable
    fashion. }
}

@InProceedings{Chor:1989:RBA,
  author =       "Benny Chor and Cynthia Dwork",
  title =        "Randomization in {Byzantine} Agreement",
  booktitle =    "Advances in Computing Research 5: Randomness and
                 Computation",
  publisher =    "JAI Press",
  year =         "1989",
  pages =        "443--497",
  OPTnote =         "A useful survey of the myriad of randomized
                 distributed algorithms for Byzantine agreement.",
  annote =       "[to read]"
}



@Article{Cristian:1989:PCS,
  author = 	 {Flaviu Cristian},
  title = 	 {Probabilistic clock synchronization},
  journal = 	 j-DC,
  year = 	 {1989},
  OPTkey = 	 {},
  volume = 	 {3},
  OPTnumber = 	 {},
  pages = 	 {146--158},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {A very well-written introduction into clock synchronization
     in ``real'' systems, and a good starting point for a lecture on this
     topic.  Assumes that there is a maximum drift rate but there is no
     maximum message delivery delay.  In this setting, clock synchronization
     can only be achieved in a probabilistic manner and Cristian well explains
     the inherent tradeoffs. Mentions that modern quartz clocks have a
     drift rate of the order $10^{-6}$, messages have some minimum time
     to travel but the distribution of delivery times (while usually being
     close to the minimum) is arbitrary. Nodes and messages can only suffer
     performance failures.  In a nice exposition, it is explained how
     a node reads another node's clock and within what bounds the reading
     is as well as the error. (The precision of the reading is better the
     shorter the round trip time of the reading was.) Fixing the error 
     results in a maximum time which a node is willing to wait for a
     result. There is a fundamental trafe-off between the precision of
     the reading and the probability of success.  Other algorithms
     like \cite{Srikanth:1987:OCS,Dolev:1995:DFC} are deterministic,
     i.e. they always reach a result but have poor precision. There
     is a continuum of probabilistic algothms between the bounds of
     setting the maximum waiting time. Setting it close to the
     minimum is ``aggressive'' and will get good results with low
     probability. The other extreme are deterministic algorithms. Cristian
     also sketches implementations of time services and gives real-world
     numbers to instantiate the formulas given. It shows that 
     synchronization within milliseconds is achievable. Overall,
     one of my top-ten favourite papers. }
}

@InProceedings{Gopal:1989:RBS,
  author =       "A. Gopal and S. Toueg",
  title =        "Reliable Broadcast in Synchronous and Asynchronous
                 Environments",
  booktitle =    pro-wdag89,
  address =      "Nice, France",
  year =         "1989",
  pages =        "110--123",
  annote =       "[to read]"
}

@InProceedings{Gray:1989:LEF,
  author =       "Cary G. Gray and David R. Cheriton",
  title =        "Leases: {An} efficient fault-tolerant mechanism for
                 distributed file cache consistency",
  booktitle =    "Proceedings of the 12th ACM Symposium on Operating
                 System Principles",
  conflocation = "Litchfield Park, AZ, 3--6 December 1989",
  journal =      "Operating Systems Review",
  volume =       "23",
  number =       "5",
  year =         "1989",
  month =        dec,
  pages =        "202--10",
  key =          "Gray89",
  keywords =     "Gray89 time-based distributed coherency, distributed
                 file sytems, V performance, lease",
  abstract =     "Caching introduces the overhead and complexity of
                 ensuring consistency, reducing some of its performance
                 benefits. In a distributed system, caching must deal
                 with the additional complications of communication and
                 host failures. {\em Leases} are proposed as a
                 time-based mechanism that provides efficient consistent
                 access to cached data in distributed systems.
                 Non-Byzantine failures affect performance, not
                 correctness, with their effect minimized by short
                 leases. An analytic model and an evaluation for file
                 access in the V system show that leases of short
                 duration provide good performance. The impact of leases
                 on performance grows more significant in systems of
                 larger scale and higher processor performance.",
  annote =       "A lease is a contract that gives its holder specified
    rights over an object for a limited period of time. In the case
    where file cache consistency is to be maintained, a cache must
    obtain a lease for an object when the application accesses that
    object. A lease implicitly contains a lease term (duration) which
    describes its validity over time. Only with a valid lease a cache
    is allowed to answer read requests for that object.  If the cache
    is requested to update an object, the cache must obtain a lease
    (if it doesn't have one already) and must then obtain approval by
    all other leaseholders for the write. When granting approval,
    leaseholders give up their lease. Here, fault tolerance comes
    into play: a client wanting to update an object must wait either
    until it has an approval of all leaseholders or until all of
    their leases have expired. (To prevent starvation, no new leases
    for an object are granted during this waiting time.) This can
    effectively help combat non-Byzantine faults in the system. Leases
    can introduce \emph{false sharing}, i.e. lease conflicts where no
    actual write conflicts exist, for example if another client cache
    has obtained a lease but has stopped using the object long before
    the lease has expired. For this, short lease terms are good.
    Short lease terms also minimize the delay caused by network
    partitions and client crashes (this is analogous to short
    aggressive time-outs in failure detection).  Long term leases
    have the advantage if objects are accesses repeatedly by the
    same client and there is little write sharing. Analytical and
    experimental results are presented, stating that lease terms
    of 5--10 seconds in the V system are quite good, based on 
    read and write rates between 0.03 (writes) and 0.8 (reads) 
    per second, message propagation of 1 msec, message processing
    time of 0.25 msec and maximum clock skew of 100 msec. These
    simulations however do not refer to fault tolerance issues.
    The leases mechanism is dependent on synchronized clocks. A
    minimum assumption is that clocks have a known bounded drift rate.
    In this case, leases can be simply communicated using their
    duration.  Server clocks that advance too quickly and client
    clocks which are too slow are problematic and can cause errors
    while the opposite (e.g. slow server clocks etc.) simply cause
    more message traffic. The conclusions contain a good cite which
    is in the spirit of Cristian and Fetzer's timed model
    \cite{Cristian:1999:TAD}: ``The lease approach is an example of a
    communication and coordination mechanism and reasoning based on
    (real) time, the availability of clocks that measure the passage
    of time with modest accuracy, and the ability to draw conclusions
    after a passage of time, possibly in the absence of communication. 
    [...] We see this use of time as a fundamental aspect of distributed
    systems with potential for significant extension beyond that
    described here.'' "
}

@Article{Halpern:1989:MKA,
  author =       {Joseph Y. Halpern and Ronald Fagin},
  title =        {Modellung knowledge and action in distributed systems},
  journal =      j-DC,
  year =         {1989},
  OPTkey =       {},
  volume =       {3},
  OPTnumber =    {},
  OPTmonth =     {},
  pages =        {159--177},
  OPTnote =      {},
  OPTannote =    "[to do]"
}



@Article{Lamport:1989:SAS,
  author = 	 {Leslie Lamport},
  title = 	 {A simple approach to specifying concurrent systems},
  journal = 	 j-CACM,
  year = 	 {1989},
  OPTkey = 	 {},
  volume = 	 {32},
  number = 	 {1},
  pages = 	 {32--45},
  month = 	 jan,
  OPTnote = 	 {},
  annote = {An amusing but still challenging paper on formal
    specifications of concurrent programs. Lamport informally presents
    the ``transition axiom method'' which is described in detail in
    \cite{Lamport:1983:SCP}. A system is a `thing' that interacts with
    its environment through a well-defined interface. The system
    properties in question are described as safety and liveness, which
    capture the essence of system behavior relevant to the
    author. (There are system properties not expressible as safety and
    liveness, some are given, confer also \cite{Rushby:1994:CSP}.) 
    Safety properties are discussed first: A simple soda vending
    machine with three (specification) states and four (specification)
    state transitions is taken as an example. The essence of Lamports
    specification method is to say which state transitions are allowed
    and which ones aren't. A system may have some unspecified state
    set $S$, and a specification can be viewed as a restriction on
    some state function $f$ from $S$ to the set of specification
    states. The machines behavior is a sequence of states
    $s_0,s_1,\ldots$ from $S$. A programmer wishing to implement the
    specification must find such a state function $f$ which changes
    its state according to the specification and some interface
    actions. Finding such a function is like proving that the
    implementation is correct regarding the specification. A
    specification must also always contain a description of the
    interface of the system in question. This description is naturally
    at an implementation level. The formula underlying a transition
    axiom specification is a temporal logic formula of the form
    $\exists f_1,\ldots,f_n$ for which
    $X(f_1,\ldots,f_n,g_1,\ldots,g_m)$. Here, $f_i$ are internal state
    functions and $g_i$ are state functions of the interface. The
    existential quantification over $f_i$ signifies the freedom of
    implementation. The fact that the $g_i$ are free variables means
    that they must appear in the implementation (i.e., are in fact
    implementation level). The internal states which are implied by a
    transition axiom specification constrain the implementation a bit;
    formalisms that do not constrain the implementation (like pure
    temporal logic) are however not more general that transition
    axioms. In fact, sometimes it's good to give some hints to an
    implementation. (However, a specification still should concentrate
    only on the externally visible behavior. Mechanisms not using
    additional state variables tend to be very complex.) The approach
    to write specifications then is to (1) choose a set of states (and
    thus state functions), (2) specify how they are allowed to change
    (these are the transition axioms), and (3) specify when they must
    change. Transition axioms are safety requirements, part 3
    specifies liveness requirements. Liveness requirements are written
    in temporal logik. A specification can be separated into safety
    and liveness, thus separating the transition axioms from the
    temporal logik part. Showing that an implementation satisfies a
    specification, one shows that the system's safety implies the
    safety specification and then that the system's safety and
    liveness imply the liveness specification. The system's safety and
    liveness are given by the implementation, which is a kind of lower
    level specification.  The paper is written in a question/answer
    style which is very amusing. A rewarding paper.}
}

@InProceedings{Mattern:1989:VTG,
  author =       "Friedemann Mattern",
  title =        "Virtual time and global states of distributed
                 systems",
  booktitle =    "Proceedings of the International Workshop on Parallel
                 and Distributed Algorithms",
  editor =       "M. Cosnard et al.",
  publisher =    "Elsevier Science Publishers",
  address =      "Chateau de Bonas, France",
  year =         "1989",
  pages =        "215--226",
  note =         "Reprinted on pages 123--133 in \cite{Yang:1994:GST}.",
  annote =       "Classic on vector time, consistent global states etc."
}

@PhdThesis{Michel:1989:KDB,
  author =       "Ruben Michel",
  title =        "Knowledge in distributed {B}yzantine environments",
  school =       "Yale University",
  year =         "1989",
  annote =       "requested from yale tech reports"
}

@Misc{Mills:1989:MPN,
  OPTkey = 	 {},
  author = 	 {David L. Mills},
  title = 	 {Measured performance of the Network Time Protocol in
		  the Internet system},
  howpublished = {Internet Request for Comments RFC 1128},
  year = 	 {1989},
  month = 	 oct,
  OPTnote = 	 {},
  OPTannote = 	 {to read}
}

@Article{Rabin:1989:EDI,
  author = 	 {Michael O. Rabin},
  title = 	 {Efficient dispersal of information for security, load 
                  balancing, and fault tolerance},
  journal = 	 {Journal of the ACM},
  year = 	 {1989},
  OPTkey = 	 {},
  volume = 	 {36},
  number = 	 {2},
  pages = 	 {335--348},
  OPTmonth = 	 apr,
  OPTnote = 	 {},
  annote = 	 {This is maybe a relation between security, fault
    tolerance and redundancy? Uses a scheme of information sharing
    to make information secure and available.}
}


@Book{Rao:1989:ECC,
  author = 	 {T. R. N. Rao and E. Fujiwara},
  title = 	 {Error-control coding for computer systems},
  publisher = 	 {Prentice-Hall},
  year = 	 {1989},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {My standard cite for error control and detection codes.}
}


@Article{Venkatesan:1989:RPD,
  author = 	 {Subbarayan Venkatesan},
  title = 	 {Reliable protocols for distributed termination detection},
  journal = 	 {IEEE Transactions on Reliability},
  year = 	 1989,
  OPTkey = 	 {},
  volume = 	 38,
  number = 	 1,
  pages = 	 {103--110},
  month = 	 apr,
  OPTnote = 	 {},
  annote = 	 {Venky's Homepage: \url{http://www.utdallas.edu/~venky/}
   The paper looks at distributed termination detection in asynchronous 
   systems with crash failures. It assumes that with $k$ failures the
   network stays connected and that channels are FIFO. States that 
   termination detection is at least as hard as consensus and thus impossible
   it the given context, so it assumes the what we today call a perfect
   failure detector. The presented protocol is based on a termination
   detection scheme built for fault free systems. If there can be $k$ 
   failures, the protocol elects $k+1$ leaders which replicate the
   state information of all other nodes. In case of a failure, the
   termination detection protocol is aborted and a new round is started.
   In this round, the leaders simulate the behavior of the crashed nodes.
   Refernces a fault tolerant snapshot protocol by Shah and Toueg
   which seems to be only available as a Cornell TR \cite{Shah:1984:DSS}.}
}

@InProceedings{Weber:1989:FSF,
  author =       "D. G. Weber",
  title =        "Formal Specification of Fault-Tolerance ad Its
                 Relation to Computer Security",
  pages =        "273--277",
  ISBN =         "0-8186-1942-2",
  editor =       "Sol Greenspan",
  booktitle =    "Proceedings of the 5th International Workshop on
                 Software Specification and Design",
  address =      "Pittsburgh, PA",
  month =        may,
  year =         "1989",
  publisher =    "IEEE Computer Society Press",
  annote = "A neat and high level description of how fault-tolerance
  in its different forms can be specified at the system interface. A
  system is identified with its set of traces. A fault scenario is a
  precise description of how the components are doomed to fail (this
  is nowadays called the fault assumption). MTTF can be calculated by
  averaging over all fault scenarios. A system $D$ has a
  fault-tolerant version $FTD$, and let $N$ be a set of fault
  scenarios where no faults occur (fault-free fault assumption) and
  $C$ be a set of fault scenarios under which we desire fault
  tolerance. Proving fault tolerance can be now done in three ways:
  (1) show that the behavior of $D$ under $N$ is identical to the
  behavior of $FTD$ under $C$, (2) characterize the behavior of $D$
  under $N$ by some specification $S$ and show that $FTD$ under $C$
  implements $S$, or (3) show that the behavior of $FTD$ under $N$ is
  identical to the behavior of $FTD$ under $C$. The third method is
  taken as the basis for a definition of fault tolerance: A system is
  fault tolerant if for all its behaviors under $C$ there is an
  equivalent behavior under $N$. This definition can be weakened by
  redefining `equivalent' to mean `acceptably equivalent' regarding
  some equivalence relation on traces. This can be also model
  gracefull degradation (as is done in \cite{Herlihy:1991:SGD}). The
  author indicates that there are close resemblances to computer
  security specifications: highly sensitive events are analogous to
  faults as they should not show up on lower levels (i.e. to
  unauthorized users). Overall a short and concise paper, one of the
  earliest using this formal view, although I don't understand the
  differences between (1), (2) and (3) above.  Referenced in
  \cite{Schepers:1993:TFT} as a similar approach as
  \cite{Joseph:1987:PRF} (expliziter Fehlermodellierung). Generally, 
  security properties are probably higher oder properties 
  cite{McLean:1994:GTC}. Not cited in \cite{Herlihy:1991:SGD}." 
}

@Article{Ben-Or:1990:FPS,
  author =       "Michael {Ben-Or} and Oded Goldreich and Silvio Micali
                 and Ronald L. Rivest",
  title =        "A Fair Protocol for Signing Contracts",
  number =       "1",
  journal =      "IEEE Transactions on Information Theory",
  volume =       "36",
  pages =        "40--46",
  year =         "1990",
  month =        jan,
  annote = "The authors present a neat fair exchange protocol which
  works as follows: two parties $A$ and $B$ exchange in rounds signed
  statements of the form ``with probability $p$ the agreed-upon
  contract is valid for me'' ($p$ is different for messages signed by
  $A$ or by $B$). Both parties start with $p=0$ and independently
  decide how to increase their $p$. In the effective case, eventually
  both will receive a statement of the form ``with probability 1 the
  contract is valid for me''.  In the non-effective case, one party
  (say $A$) can turn to a judge and present to it the last message it
  received from $B$. The judge throws a dice and decides with
  probability $p_B$ whether the contract holds for both or not. If it
  holds, $B$ must obey the contract too. If it does not hold, the
  contract is refuted. The judge must be able to recollect every
  verdict (and thus usually store the value together with the contract
  [a method is given how this can be circumvented]). Overall, this is
  a very interesting and well-written paper keeping mathematics
  small. The protocol can be seen as a very general and clever gradual
  exchange protocol which can also be applied if the to be exchanged
  item is not infinitely splittable. It is optimistic since the judge
  is only needed in failed cases. The paper is also interesting since
  it reviews some fairness definitions regarding gradual exchange
  (computational vs. probabilistic) and thus comes close to the
  formalization of strong fairness of \cite{Gaertner:1999:AFD}. Also,
  an impossibility result concerning two-party exchange is cited
  cite{Even:1980:RAP} which is difficult to get, but relevant for
  \cite{Pagnia:1999:IFE}."
}

@Article{Biran:1990:CCD,
  title =        "A Combinatorial Characterization of the Distributed
                 1-Solvable Tasks",
  author =       "Ofer Biran and Shlomo Moran and Shmuel Zaks",
  pages =        "420--440",
  journal =      "Journal of Algorithms",
  year =         "1990",
  month =        sep,
  volume =       "11",
  number =       "3",
  annote = "[to read] extends \cite{Fischer:1985:IDC}."
}

@Article{Buerk:1990:VES,
  author =       "Holger B{\"u}rk and Andreas Pfitzmann",
  title =        "Value Exchange Systems Enabling Security and
                 Unobservability",
  keywords =     "digital money, TTP, payment, pseudonyms, ware,
                 complaint period",
  journal =      "Computers \& Security",
  year =         "1990",
  annote =       "[havent got a copy, annote written by someone else:]
                 two approaches to overcome the problem of simultaneity
                 in value exchange. both based on digital signatures
                 (pseudonyms/one-show credentials) certified by TTP: 1.)
                 passive TTP: - mutual authentication using pseudonyms X
                 <-> Y - signing of agreement X <-> Y - money X -> Y -
                 ware Y -> X, or complaint X -> TTP, TTP checks
                 agreement, asks Y to deliver again and passes ware to X
                 or identifies Y (-> court) 2.) active (intermediary)
                 TTP: - X,Y,TTP make agreement (to protect from TTP) -
                 money X -> TTP (as money can not be copied this has to
                 be done before ware to protect from faulty TTP) - ware
                 Y -> TTP (after receiving ``money-commit'') - money ->
                 Y, ware -> X (after check of ware) abortion after
                 money-transfer requires signed cancelation by TTP
                 and/or prove of payment. how handling
                 interactive/non-transferable payments. question of
                 quality of service have to solved outside the system by
                 a court. (good paper, not too formal)",
  number =       "8",
  pages =        "715--721",
  volume =       "9",
}


@Article{Champine:1990:PAD,
  author =       "George A. Champine and Daniel E. {Geer, Jr.} and
                 William N. Ruh",
  title =        "{Project Athena} as a Distributed Computer System",
  journal =      j-IEEE-COMPUTER,
  volume =       "23",
  number =       "9",
  pages =        "40--51",
  month =        sep,
  year =         "1990",
  abstract =     "Now providing 10,000 students and faculty with a
                 variety of network services, MIT's educational
                 workstation system is designed to grow to 10 times its
                 present size.",
  annote =       "[to read]"
}

@InProceedings{Chaudhuri:1990:AHC,
  author = 	 "Soma Chaudhuri",
  title = 	 "Agreement is harder than consensus: set consensus
		  problems in totally asynchronous systems",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  OPTpages = 	 "311--324",
  booktitle = "Proceedings of Principles of Distributed Computing 1990",
  year = 	 "1990",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "The author investigates the boundary between
		  possibility and impossibility of solutions to
		  problems in asynchronous systems. The problems
		  investigated are $k$-set consensus problems, where
		  the agreed upon set of values has size at most
		  $k$. It is shown that the $m$-resiliency is in
		  relation to the size $k$ of the set. This is another
		  paper exploring the border to impossibility after
		  the FLP result \cite{Fischer:1985:IDC} in the lines
		  of \cite{Attiya:1987:ACA,Dolev:1987:MSN} and
		  \cite{Dwork:1988:CPP}. A subsequent version appeared
		  in Information and Computation, 105 (1), 1993,
		  pp. 132--158."
}



@Article{Dwork:1990:KCK,
  author =       "Cynthia Dwork and Yoram Moses",
  title =        "Knowledge and Common Knowledge in a {B}yzantine
                 Environment: Crash Failures",
  journal =      "Information and Computation",
  year =         "1990",
  volume =       "88",
  number =       "2",
  pages =        "156--186",
  topic =        "epistemic-logic;mutual-belief;Byzantine-agreement;",
  annote =       "[to read]"
}


@InCollection{Emerson:1990:TML,
  author = 	 {E. Allen Emerson},
  title = 	 {Temporal and Modal Logic},
  booktitle = 	 {Handbook of Theoretical Computer Science},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {997--1072},
  publisher = {Elsevier},
  year = 	 {1990},
  editor = 	 {Jan van Leeuwen},
  volume = 	 {B},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTtype = 	 {},
  chapter = 	 {16},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {Brilliant introduction into the zoo of temporal logics.}
}

@InProceedings{Gopal:1990:SFB,
  author =       "A. Gopal and S. Toueg",
  title =        "On the Specification of Fault-Tolerant Broadcast",
  booktitle =    "Proc. Int. Workshop on Future Trends of Distributed
                 Computing Systems",
  pages =        "54--56",
  publisher =    "IEEE Computer Society Press",
  address =      "Cairo, Egypt",
  year =         "1990",
  annote =       "[to read]"
}

@ARTICLE{Gouda:1990:SU,
	AUTHOR = "Mohamed G. Gouda and Ted Herman",
	TITLE = "Stabilizing unison",
	JOURNAL = j-IPL,
	VOLUME = 35,
	YEAR = 1990,
	PAGES = "171--175",
        annote = "A short paper in the lines of \cite{Arora:1991:MDC}."
	}

@InProceedings{Gronning:1990:SDD,
  title =        "Stepwise Development of a Distributed Load Balancing
                 Algorithms",
  author =       "Peter Gr{\o}nning and Thomas Qvist Nielsen and Hans
                 Henrik L{\o}vengreen",
  booktitle =    pro-wdag90,
  editor =       "Jan van Leeuwen and Nicola Santoro",
  year =         "1990",
  series =       ser-LNCS,
  volume =       "484",
  ISBN =         "ISBN 3-540-54099-7",
  pages =        "151--168",
  annote =       "Abstract problem statement like in
                  \cite{Arora:1995:ECC}. Formal definition of globally
                  $k$-balanced, locally $k$-balanced. Resulting system is
                  only locally balanced by simple local exchanges of
                  one load unit at a time. No global balancing wanted,
                  but a very broad sense of global balance achieved
                  (depending on the diameter of the network). The
                  abstract algorithm is transformed into a message passing
                  environment. Explicit reference to
                  self-stabilization of Dijkstra and hints to papers
                  on stepwise development out of specifications."
}


@InProceedings{Halpern:1990:CEB,
  author =       "Joseph Halpern and Yoram Moses and Orli Waarts",
  title =        "A Characterization of Eventual Byzantine Agreement",
  pages =        "333--346",
  ISBN =         "0-89791-404-X",
  editor =       "Cynthia Dwork",
  booktitle =    "Proceedings of the 9th Annual {ACM} Symposium on
                 Principles of Distribted Computing",
  address =      "Qu{\'e}bec City, Qu{\'e}bec, Canada",
  month =        aug,
  year =         "1990",
  publisher =    "ACM Press",
  annote =       "The authors show that while common knowledge is
		  sufficient to achieve simultaneous Byzantine
		  agreement, eventual Byzantine agreement (EBA) is
		  equivalent to achieving continual common knowledge,
		  a variant of common knowledge. They give a brief
		  introduction into the knowledge formalism, define
		  and characterize continual common knowledge and show
		  how to construct optimal EBA protocols with a
		  certain technique. The fault class under
		  consideration comprises omission and crash
		  faults. The conclusions state that results can be
		  extended to Byzantine faults, asynchronous systems
		  and general coordination problems. Overall a very
		  concise and brief-up-to-the-bare-minimum paper."
}

@Article{Halpern:1990:KCK,
  author =       "Joseph Y. Halpern and Yoram Moses",
  title =        "Knowledge and common knowledge in a distributed
                  environment",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-ACM,
  year =         "1990",
  volume =       "37",
  number =       "3",
  pages =        "549--587",
  month =        jul,
  OPTnote =      "",
  annote =       "A brilliant paper on the role of ``knowledge'' in
                  distributed systems. The authors define different
                  notions of knowledge (as opposed to belief) and
                  emphasize the small differences by amusing and
                  instructive examples. The different notions of
                  knowledge are: distributed knowledge of x of a group
                  G (someone who knows everything that people in G
                  know knows x), ``someone'' knowledge, ``everyone''
                  knowledge, and common knowledge. These form a strict
                  hierarchy. Different forms of knowledge can be
                  ascribed to processors, the most common being
                  view-based knowledge. View-based knowledge of
                  processor p of a fact f means that f is true in all
                  points that are indistinguishable by p. Normally,
                  view-based knowledge bases on the state (or state
                  history) of a node. Common knowledge is the
                  strongest form and the authors show that it is at
                  the core to a lot of important problems in
                  distributed systems (for example agreement). The use
                  the coordinated attack problem (or two way
                  handshake) to show, that common knowledge is not
                  attainable in systems with unreliable (or completely
                  asynchronous) message passing and without a global
                  clock. In general, such communication cannot be used
                  to attain common knowledge. This is a direct
                  connection to the impossibility of consensus in
                  asynchronous systems \cite{Fischer:1985:IDC}. In
                  practice, many problems can only be solved because
                  the do not require common knowledge. But also: There
                  are certain weaker kinds of common knowledge that
                  are attainable. The first is epsilon-common
                  knowledge, where the fact of sending a message (and
                  that all others receive it) will become common
                  knowledge within epsilon time steps. (This is
                  analogous to a synchronous broadcast.) The second is
                  eventual common knowledge, where sending a message
                  will eventually become common knowledge. (This
                  corresponds to asynchronous but reliable
                  communication.) Eventual common knowledge is weaker
                  than epsilon common knowledge. Things that can not be
                  attained using reliable communication cannot be
                  attained too if communication is
                  unreliable. Finally, the notion of timestamped
                  common knowledge is discussed (``at time t on p's
                  clock p knows something''). Timestamped common
                  knowledge is aparent in many protocols that operate
                  in rounds. At the end, the notion of virtual
                  synchrony is connected to the notion of knowledge
                  consistency, where nodes may actually not have
                  common knowledge, but nothing they see violates this
                  assumption. The conclusions contain hints to other
                  research in the field. Overall, this is a paper with
                  a huge potential that seemingly hasn't been followed
                  in recent years."
}

@Article{Jalote:1990:FRW,
  author =       "P. Jalote and S. K. Tripathi",
  title =        "Final Report on Workshop on Integrated Approach for
                 Fault Tolerance - Current State and Future
                 Requirements",
  journal =      "ACM Operating Systems Review",
  volume =       "24",
  number =       "1",
  pages =        "40--57",
  year =         "1990",
  annote =       "[to read]"
}

@InProceedings{Jayanti:1990:WUR,
  title =        "Wakeup under Read/Write Atomicity",
  author =       "Prasad Jayanti and Sam Toueg",
  booktitle =    "Distributed Algorithms, 4th International Workshop",
  editor =       "Jan van Leeuwen and Nicola Santoro",
  address =      "Bari, Italy",
  month =        "24--26~" # sep,
  year =         "1990",
  series =       "Lecture Notes in Computer Science",
  volume =       "486",
  publisher =    pub-SV,
  ISBN =         "ISBN 3-540-54099-7",
  pages =        "277--288",
  annote = "Ted says that here's a possible relation between 
    self-stabilization and unreliable failure detection. Have to get it."
}


@Book{Krumm:1990:FAK,
  author = 	 {Heiko Krumm},
  title = 	 {{Funktionale Analyse von Kommunikationsprotokollen}},
  publisher = 	 pub-SV,
  year = 	 {1990},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  number = 	 {247},
  series = 	 {Informatik-Fachberichte},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},

  annote = {Krumm entwirft ein allgemeines Modell zur Beschreibung
    funktionaler Aspekte von Kommunikationsprotokollen kooperierender
    Systeme und gibt einen verdienstvollen Ueberblick ueber die
    existierenden Spezifikationstechniken und deren Zusammenhaenge
    untereinander. Die Grundbegriffe des Modells sind System, Kopplung
    und Instanz. Ein System besteht aus einer Menge von Instanzen, die
    intern ueber eine Kopplung kommunizieren. Das System selbst hat im
    Falle eines offenen Systems eine Schnittstelle zu einer
    Systemumwelt, und da Instanzen selbst wieder Systeme im Kleinen
    sind, haben Instanzen auch eine Instanz-Schnittstelle. Diese
    statische Grundstruktur erlaubt eine hierarchische
    Systemdefinition. Systeme bzw. Instanzen koennen intern betrachtet
    werden (d.h. ihr innerer Aufbau inklusive Subsystemen und
    Kopplung) oder extern (d.h. nur anhand ihres Verhaltens an der
    Schnittstelle). Eine Schnittstelle ist eine Menge von
    Ereignissen. Das Kommunikationsverhalten an einer Schnittstelle
    ist ein Baum, dessen Kanten mit derartigen Ereignissen bezeichnet
    ist. Kommunikationsverhalten abstrahiert von internem Verhalten
    eines Systems. Durch Betrachtung des Kommunikationsverhaltens ist
    es moeglich, Instanzen bezueglich ihres Verhaltens zu
    vergleichen. Kommunikation wird modelliert durch eine Kopplung,
    die selbst ein System ohne interne Zustaende ist und atomar
    beliebig viele Ereignisse (=Nachrichten) an der Schnittstelle
    empfangen und Versenden kann. Kopplungen arbeiten nach dem
    Uebereinkunftsprinzip (synchron) oder nach dem
    Uebertragungsprinzip (asynchron). Systeme selbst werden auf der
    Basis von Zustaenden und Zustandsuebergaengen definiert. Aus
    dieser Definition entspricht der Menge aller Systemablaeufe ein
    Erreichbarkeitsgraph, der eine endliche Repraesentation eines
    prinzipiell unendlichen Verhaltens ist. Das
    Schnittstellenverhalten (Kommunikationsverhalten) ist ein Baum,
    der mit dem internen Systemablauf vertraeglich ist. Anschliessend
    wird auf die Begriffe Dienst und Protokoll eingegangen. Ein
    Protokoll ist ein internes Ablaufverhalten eines Systems, welches
    vom globalen Verhalten abstrahiert und nur die vom Protokoll
    reglementierten Kommunikationsbeziehungen betrachtet. Ein Dienst
    ist eine Instanz, die an ihrer Schnittstelle ein gewisses
    Verhalten (mit gewissen Ereignissen)
    garantiert/anbietet. Protokolle sind darum horizontale
    Kommunikationsbeziehungen waehrend Dienste vertikale Beziehungen
    darstellen (bezogen auf die gebraeuchliche Darstellung des
    ISO/OSI-Protokollstacks). Im folgenden Kapitel werden die
    gaengigen Analysemassnahmen angesprochen (von informalen
    Ueberpruefungen bis formalen Korrektheitsnachweisen) und der
    Begriff der Eigenschaft eines Protokolls definiert. Anschliessend
    werden gaengige Spezifikationstechniken klassifiziert nach
    Spezifikationsform (konstruktiv = spezifiziere
    Schnittstellenverhalten durch internes Verhalten, deskriptiv =
    spezifiziere Schnittstellenverhalten durch direkte
    Verhaltensbeschreibung an der Schnittstelle). Konstruktive
    Techniken koennen direkt (konkreter Automat angegeben) oder
    algebraisch (eine gewisse Abstraktion von internem
    Automatenverhalten) sein. Deskriptive Techniken koennen logisch
    (aufbauend auf einem (temporal-)logischen Kalkuel) oder auf
    Zusicherungen aufbauen. Letztere koennen allerdings nur
    Sicherheitseigenschaften verifizieren. Beispiele fuer die
    einzelnen Spezifikationsformen werden gegeben (Petri Netze,
    Milners CCS, erweiterte endliche Automaten). Das Buch ist
    insgesamt sehr gut lesbar und auch fuer Einsteiger in das Gebiet
    durchaus geeignet, vor allem, weil es auf Deutsch ist.}  }


@INCOLLECTION{Lamport:1990:DCM,
  AUTHOR =     "Leslie Lamport and Nancy Lynch",
  TITLE =      "Distributed computing: models and methods",
  BOOKTITLE =  "Handbook of Theoretical Computer Science
                (Volume B: Formal Models and Semantics)",
  PUBLISHER =  "Elsevier",
  YEAR =       1990,
  CHAPTER =    18,
  PAGES =      "1157--1199",
  NOTE =       "J. van Leeuwen, Editor",
  annote =     ""
}

@Book{Lee:1990:FTP,
  author =       "Peter A. Lee and Thomas Anderson",
  title =        "Fault Tolerance: Principles and Practice",
  series =       "Dependable computing and fault-tolerant systems",
  publisher =    pub-SV,
  address =      "Berlin ; New York",
  year =         "1990",
  edition =      "Second",
  annote =         "[to read]",
}


@Article{Leveson:1990:USC,
  author =       "Nancy G. Leveson and Stephen S. Cha and John C. Knight
                 and Timothy J. Shimeall",
  title =        "The Use of Self Checks and Voting in Software Error
                 Detection: An Empirical Study",
  journal =      "IEEE Transactions on Software Engineering",
  volume =       "16",
  number =       "4",
  pages =        "432--443",
  year =         "1990",
  abstract =     "This paper presents the results of an empirical study
                 of software error detection using self checks and
                 N-version voting. A total of 24 graduate students in
                 computer science at the University of Virginia and the
                 University of California, Irvine, were hired as
                 programmers. Working independently, each first prepared
                 a set of self checks to an existing implementation of
                 that specification. The modified programs were executed
                 to measure the error-detection performance of the
                 checks and to compare this with error detection using
                 simple voting among multiple versions. The goal of this
                 study was to learn more about the effectiveness of such
                 checks. The analysis of the checks revealed that there
                 are great differences in the ability of individual
                 programmers to design effective checks. We found that
                 some checks which might have been effective failed to
                 detect an error because they were badly placed, and
                 there were numerous instances of checks signaling
                 nonexistent errors. In general, specification-based
                 checks alone were not as effective as combining them
                 with code-based checks. using self checks, faults were
                 identified that had not been detected previously by
                 voting 28 versions of the program over a million
                 randomly-generated inputs. This appeared to result from
                 the fact that the self checks could examine the
                 internal state of the executing program whereas voting
                 examines only the final results of computations. If
                 internal states had to be consistent in N-version
                 voting systems, then there would be no reason to write
                 multiple versions. The programs were executed on 100
                 000 new randomly-generated input cases in order to
                 compare error detection by self checks and by 2-version
                 and 3-version voting. Both self checks and voting
                 techniques led to the identification of the same number
                 of faults for this input, although the identified
                 faults were not the same. Furthermore, whereas the self
                 checks were always effective at detecting an error
                 caused by a particular fault (if they ever did),
                 N-version voting triples and pairs were only partially
                 effective at detecting the failures caused by
                 particular faults. Finally, checking the internal state
                 with self checks also resulted in finding faults that
                 did not cause failures for the particular input case
                 executed. This has important implications for the use
                 of back-to-back testing.",
  note =         "29 refs",
}



@Article{Mullender:1990:ADO,
  author =       "Sape J. Mullender and Guido {van Rossum} and Andrew S.
                 Tanenbaum and Robbert {van Renesse} and Hans {van
                 Staveren}",
  title =        "{Amoeba}: {A} Distributed Operating System for the
                 1990s",
  journal =      j-IEEE-COMPUTER,
  volume =       "23",
  number =       "5",
  pages =        "44--53",
  month =        may,
  year =         "1990",
  abstract =     "Amoeba is the distributed system developed at the Free
                 University (VU) and the Centre for Mathematics and
                 Computer Science (CWI), both in Amsterdam. Throughout
                 the project's ten-year history, a major concern of the
                 designers has been to combine the research themes of
                 distributed systems, such as high availability, use of
                 parallelism and scalability, with simplicity and high
                 performance. Distributed systems are necessarily more
                 complicated than centralized systems, so they have a
                 tendency to be much slower. Amoeba was always designed
                 to be used, so it was deemed essential to achieve
                 extremely high performance. The Amoeba software is
                 based on objects. An objects is a piece of data on
                 which well-defined operations may be performed by
                 authorized users, independent of where the user and
                 object are located. Objects are managed by server
                 processes and named using capabilities chosen randomly
                 from a sparse name space. Processes consist of a
                 segmented address space shared by one or more threads
                 of control. Processes can be created, managed, and
                 debugged remotely. Operations on objects are
                 implemented using remote procedure calls. Amoeba has a
                 unique and fast file system. The file system is split
                 into two parts --- the Bullet Service, which stores
                 immutable files contiguously on the disk and the SOAP
                 Directory Service, which provides a mechanism for
                 giving capabilities symbolic names. The directory
                 server also handles replication and atomicity,
                 eliminating the need for a separate transaction
                 management system.",
  annote =       "[to read]"
}

@Article{Neiger:1990:AIF,
  author =       "Gil Neiger and Sam Toueg",
  title =        "Automatically Increasing the Fault-Tolerance of
                 Distributed Algorithms",
  journal =      "Journal of Algorithms",
  year =         "1990",
  volume =       "11",
  number =       "3",
  pages =        "374--419",
  annote = "Say you have designed a distributed algorithm in a
  synchronous (round based) system that tolerates crash failures using
  reliable communication. Can you mechanically derive a protocol which
  does the same thing and also tolerates send-omission, general
  omission or arbitrary failures? Yes you can, and Neiger and Toueg
  show you how to do it. The authors define a so-called translation,
  i.e. a function $T$ that converts a protocol $P_b$ to a protocol
  $P_s$. $P_b$ is correct when running in a system subject to a
  ``benign'' failure model $b$, and $P_s=T(P_b)$ is supposed to be
  correct in a system subject to a more severe failure model
  $s$. Correctness means that $P_s$ has the same set of histories as
  $P_b$ when you inspect only that part of the state which also exists
  in $P_b$. Also, only the states after a fixed numer $c$ are
  inspected (i.e. they speak of a $c$-phase simulation). Formally, a
  translation from a system $S_b$ to a system $S_s$ is given by a
  history simulation function $H$ with the following properties: (a)
  $H$ maps histories of a protocol running in $S_s$ to histories in
  $S_b$ and these histories are valid. (b) $H$ preserves the
  correctness of processors, i.e. a processor correct in $S_s$ is also
  correct in $S_b$ (not necessarily vice versa), (c) the states from a
  history in $S_b$ appear in steps of $c$ in the history of
  $S_s$. This refers to the states of all processors (this must be
  weakened when investigating translations to the byzantine failure
  model; there they only refer to the state of the correct
  processors). A translated protocol solves some problem if its
  translated histories solve the original problem specification. The
  authors continue to present translations from crash to send-omission
  and then from crash to general-omission. The idea is to insert
  additional rounds of communication and let processors which do a
  general omission crash themselves. Because the number of faulty
  processors in both systems is $t$, such a translation is
  possible. When dealing with arbitrary failures, the properties of
  the translation function are weakened (see above). Translations are
  presented which use a validated reliable broadcast primitive to be
  able to detect byzantine behavior and pretend that the bad processes
  crashed. Some lower bounds are proved as well. Overall a
  well-readable paper despite the formalisms and the proofs. It is
  interesting how the original correctness specifications are
  transformed into systems with a more severe failure model: with
  crash the specification stays the same (since we are in a
  synchronous environment this is possible \cite{Gaertner:1999:ESD}),
  with Byzantine we restrict the correctness to the set of correct
  processes. Are there intermediate steps?"
}

@Article{Nelson:1990:FTC,
  author =       "Victor P. Nelson",
  title =        "Fault-tolerant computing: fundamental concepts",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-IEEE-COMPUTER,
  year =         "1990",
  volume =       "23",
  number =       "7",
  pages =        "19--25",
  month =        jul,
  OPTnote =      "",
  annote =       "The author first defines the usual terms (fault,
                  error, failure, fault classes, availability,
                  dependability, reliability) and then reviews common
                  elements of strategies in fault tolerance with focus on
                  hardware. The elements are fault masking, fault
                  detection, fault containment, diagnosis,
                  repair/reconfiguration, recovery. He elaborates on
                  error detection/masking/correction (using codes),
                  self-checking logic, module replication, timing
                  checks, fault containment, reconfiguration, repair
                  and recovery. He only handles masking fault tolerance
                  (indicating that safety is more important than liveness
                  \cite{Kreitz:1998:SWL}). An insightfull paper where
                  the ideas come from the obvious strive to organize
                  the material more strictly. This is a task Nelson
                  initiates, but has seemingly not aimed at."
}

@Article{Ramanathan:1990:FCS,
  author =       "Parameswaran Ramanathan and Kang G. Shin and Ricky W.
                 Butler",
  title =        "Fault-Tolerant Clock Synchronization in Distributed
                 Systems",
  journal =      "Computer",
  volume =       "23",
  number =       "10",
  pages =        "33--42",
  month =        oct,
  year =         "1990",
  abstract =     "Software algorithms are suitable only where loose
                 synchronization is acceptable, and hardware algorithms
                 are expensive. A hybrid scheme achieves reasonably
                 tight synchronization and is cost-effective.",
  keywords =     "Computer Software--Applications; Computer Systems,
                 Digital; Computers, Digital--Synchronization;
                 Consistency algorithms; Convergence-averaging;
                 Convergence-nonaveraging; Distributed; Distributed
                 systems; Fault tolerant clock synchronisation;
                 Fault-Tolerant Systems; Hardware synchronization
                 algorithms; Hybrid synchronization; Probabilistic
                 synchronization; Software synchronization algorithms;
                 Synchronization Algorithms; Worst-case clock skews",
  annote =       "[to read]"
}

@Book{Raynal:1990:SCD,
  author =       "Michel Raynal and Jean-Michel Helary",
  title =        "Synchronization and Control of Distributed Systems and
                 Programs",
  series =       "Wiley Series in Parallel Computing",
  pages =        "124",
  publisher =    "John Wiley \& Sons",
  address =      "New York",
  year =         "1990",
  keywords =     "book, text,",
  abstract =     "** Description ** The mastery of distributed
                 applications demands a complete understanding of the
                 foundations of the distributed algorithm. The object of
                 this book is to present these foundations as they
                 relate to synchronization--the key element of
                 parallelism and distribution. Divided into four
                 chapters, it explores the different types of
                 synchronization that may be encountered in a parallel
                 application and presents the concept of wave and
                 several of its possible implementations. Synchronous
                 and asynchronous sytems and their relationships are
                 described, as well as the concept of the
                 synchronization phase, its properties, and its use.\par
                 ** Contents ** Different Forms of Synchronization
                 between Processes. The Concept of a Wave, and
                 Synchronization by Wave Sequence. Synchronization by
                 Logic Pulsing. Synchronization by Phases. Appendices.
                 References. Index.\par ** Market ** Engineers,
                 Researchers, Professors and Students of Engineering.",
  note =         "F-0-471-92453-9 1990cloth \$84.95",
}

@Article{Schneider:1990:IFS,
  author =       "Fred B. Schneider",
  title =        "Implementing fault-tolerant services using the state
                  machine approach: {A} tutorial",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-ACM-COMP-SURVEYS,
  year =         1990,
  volume =       22,
  number =       4,
  pages =        "299--319",
  month =        dec,
  OPTnote =      "",
  annote =       "The state machine approach enhances the
                  fault-tolerance properties of a system by
                  replicating nodes and coordinating the actions of
                  these nodes (and the communication to and from them).
                  The replica group thus acts as a single state
                  machine, but now a certain number and kind of faults
                  can be tolerated. This paper presents this approach
                  and also discusses reconfiguration techniques. This
                  is the paper to cite for the term ``state machine
                  appoach''."
}

@Article{VanGasteren:1990:ANI,
  author = 	 {A. J. M. van Gasteren and Gerard Tel},
  title = 	 {Comments on ``on the proof of a distributed
                  algorithm'': always true is not invariant},
  journal = 	 ipl,
  year = 	 1990,
  OPTkey = 	 {},
  volume = 	 35,
  OPTnumber = 	 {},
  month = 	 "September",
  pages = 	 "277--279",
  OPTnote = 	 {},
  annote = 	 "a paper which explains the intrinsic difference
		  between the notions of ``invariant'' and
		  ``always-true''. A predicate $P$ is an invariant (1)
		  if $P$ holds in every initial state of a system, and
		  (2) $P$ is not falsified by any action of the
		  system. A predicate $P$ is always true if $P$ holds
		  in every reachable state of the system. This means, an
		  invariant is always true, but the converse is 
		  not valid.  Example: a program which has one
		  variable $k$ (initially 0) and one action ``if $k=1$
		  then $k:=2$''. Consider the predicate $P\equiv
		  k<2$. Then $P$ is always true for the program, but
		  $P$ is not an invariant, because the action does not
		  maintain $P$. The authors argue that invariance is
		  more useful because it is maintained by program
		  composition."
}



@Article{Abadi:1991:ERM,
  author = 	 {Mart{\'\i}n Abadi and Leslie Lamport},
  title = 	 {The Existence of Refinement Mappings},
  journal = 	 {Theoretical Computer Science},
  year = 	 {1991},
  OPTkey = 	 {},
  volume = 	 {82},
  number = 	 {2},
  pages = 	 {253--284},
  month = 	 may,
  OPTnote = 	 {},
  url = "http://www.research.digital.com/SRC/personal/Martin_Abadi/Papers/tcs.ps",
  annote = {Programs and specifications are viewed as formulas of the
  same logic (originally an idea of \cite{Pnueli:1981:TSC} explained
  in \cite{Abadi:1995:CS,Abadi:1993:CS,Lamport:1989:SAS}). The
  semantics of such a formula is the set of executions $\phi$ produced
  by that formula. A program $p_1$ implements $p_2$ if $\phi(p_1)$
  implies $\phi(p_2)$. That $p_1$ implements $p_2$ can be shown by
  exhibiting a refinement mapping which relates the actions of $p_1$
  to those of $p_2$. However, the validity of the implication does not
  guarantee that such a mapping exists. A refinement mapping between
  state spaces $S_1$ and $S_2$ can be used to prove that a state
  machine $\Sigma_1$ using states from $S_1$ implements a state
  machine $\Sigma_2$ using states from $S_2$. The main result of this
  paper is the following theorem: If $\Sigma_1$ implements $\Sigma_2$
  then one can add history and prophecy variables to the specification
  of $\Sigma_1$ to find a refinement mapping from $S_1$ to $S_2$. The
  assumptions to prove this theorem are: (1) $S_1$ is machine closed,
  i.e. the ``liveness'' property of $\Sigma_1$ does not imply
  additional safety properties. (2) $\Sigma_2$ has finite invisible
  nondeterminism, i.e. external steps of $\Sigma_2$ must be finitely
  representable internally, and (3) $\Sigma_2$ is internally
  continuous, i.e. a not-allowed behavior can be determined by looking
  at the externally visible behavior plus only a finite part of the
  internal behavior. Other proved propositions are: any safety
  property has a specification with finite invisible nondeterminism,
  any safety property is internally continuous, and any property has a
  machine closed specification. The result shows that it is always
  possible to prove safety using refinement mappings, if not
  liveness.}
}

@Article{Arora:1991:MDC,
  author =       "A. Arora and S. Dolev and M. Gouda",
  title =        "Maintaining digital clocks in step",
  journal =      "Parallel Processing Letters",
  volume =       "1",
  number =       "1",
  pages =        "11--18",
  month =        sep,
  year =         "1991",
  keywords =     "clocks; N-clock; simultaneously triggered clocks;
                 stabilisation; stability; system",
  annote =       "The authors present a design for achieving exact
		  synchronization of bounded digital clocks in
		  synchronous (i.e., lock-step) systems like digital
		  circuits. The approach is an early example of
		  applying the closure and convergence paradigm to
		  problems, resulting in two self-stabilizing
		  solutions: (1) a fall back solution, where a node
		  periodically checks the clocks of its neighbours and
		  falls back to a minumum value if values differ; and
		  (2) a catch up solution where a maximum value is
		  taken. The protocols are simple, uniform and
		  distributed. The stabilization time is in the oder
		  of the degree of the nodes times the diameter of the
		  network. Overall, this is a paper unmistaken in the
		  clarity and enjoyment of exposition and style,
		  gladly to be read."
}

@INPROCEEDINGS{Awerbuch:1991:SLC,
	AUTHOR = "Baruch Awerbuch and Boaz Patt-Shamir and George Varghese",
	TITLE = "Self-stabilization by local checking and
	 correction",
	BOOKTITLE = "FOCS91 Proceedings of the 31st Annual IEEE
		  Symposium on Foundations of Computer Science",
	YEAR = 1991,
	PAGES = "268--277",
        annote = "[to write]"
	}

@InProceedings{Chandra:1991:UFD,
  author =       "Tushar Deepak Chandra and Sam Toueg",
  title =        "Unreliable failure detectors for asynchronous systems",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  pages =        "325--340",
  booktitle = pro-podc91,
  year =         "1991",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "The authors introduce the concept of a failure
                  detector to battle the impossibility of consensus in
                  asychronous systems. Failure detectors allow to make
                  guesses on which computers are still alive and which
                  are not in the network. They are classified and
                  applied to consensus and atomic broadcast. See also
                  the journal version of this paper
                  \cite{Chandra:1996:UFD}. First reference to the concept."
}

@ARTICLE{Chen:1991:SAC,
        AUTHOR = "Nian-Shing Chen and Hwey-Pyng Yu and Shing-Tsaan Huang",
        TITLE = "A self-stabilizing algorithm for constructing
         spanning trees",
        JOURNAL = j-IPL,
        VOLUME = 39,
        YEAR = 1991,
        PAGES = "147--151"
        }




@Article{Cooper:1991:CDG,
  author =       "Robert Cooper and Keith Marzullo",
  title =        "Consistent detection of global predicates",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-SIGPLAN,
  year =         "1991",
  volume =       "26",
  number =       "12",
  pages =        "167--174",
  month =        dec,
  OPTnote =      "",
  annote =       "Citable definition of possibly(P) and definitely(P)."
}

@Article{Cristian:1991:UFD,
  author =       {Flaviu Cristian},
  title =        {Understanding fault-tolerant distributed systems},
  journal =      j-CACM,
  year =         1991,
  volume =       34,
  number =       2,
  month =        feb,
  pages =        "56--78",
  annote =       "Describes traditional approach to fault-tolerant
                  computing: failure models, failure semantics,
                  fault-tolerance, architectural issues, standard
                  systems, masking failures, hardware and software
                  fault-tolerance."
}

@Book{Echtle:1990:F,
  author = 	 {Klaus Echtle},
  title = 	 {Fehlertoleranzverfahren},
  publisher = 	 pub-SV,
  year = 	 1990,
  annote =       "Echtle's well-known book on fault tolerance strategies. 
                 Ist in der Bib. Inf. vorhanden"
}

@InProceedings{Arora:1991:MDS,
  title =        "Maintaining Digital Clocks In Step",
  author =       "Anish Arora and Shlomi Dolev and Mohamed G. Gouda",
  booktitle =    "Distributed Algorithms, 5th International Workshop",
  editor =       "Sam Toueg and Paul G. Spirakis and Lefteris M.
                 Kirousis",
  address =      "Delphi, Greece",
  month =        "7--9~" # oct,
  year =         "1991",
  series =       ser-LNCS,
  volume =       "579",
  publisher =    pub-SV,
  ISBN =         "ISBN 3-540-55236-7",
  pages =        "71--79",
  annote =       "[to get]"
}


@InProceedings{Flatebo:1991:SLB,
  author =       "Mitchell Flatebo and Ajoy Kumar Datta",
  title =        "Self-stabilizing load balancing for an arbitrary network",
  OPTcrossref =  "",
  OPTkey =       "",
  editor =       "J. Wu and W. Gao and J. Yang and Y. Li",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  pages =        "743--746",
  booktitle = "ICYCS-93: Young Computer Scientists. Proceedings of
                  the Third International Conference",
  year =         "1993",
  OPTorganization = "",
  publisher =    "Tsinghua University Press",
  address =      "Beijing, China",
  month =        "July",
  OPTnote =      "",
  annote =       "[who can get a hand on this?]"
}

@ARTICLE{Gouda:1991:AP,
	AUTHOR = "Mohamed G. Gouda and Ted Herman",
	TITLE = "Adaptive programming",
	JOURNAL = j-IEEE-TRANS-SOFTW-ENG,
	VOLUME = 17,
        NUMBER = 9,
        MONTH = sep,
	YEAR = 1991,
	PAGES = "911--921",
        annote = "Adaptive programs change their behaviour according
		  to changes in their environment. Environment changes
		  are assumed to be gradual and occur within
		  relatively short periods of time compared to long
		  periods of non-change. During periods of change an
		  adaptive program behaves arbitrarily and eventually
		  reaches a consistent behaviour if changes cease. The
		  authors define adaptivity in terms of a `secures'
		  relation: P secures Q in S means that if the
		  environment establishes an input predicate P, then
		  the program S will eventually reach a state where Q
		  holds. A program is adaptive, if all properties of
		  interest can be expressed using the secures
		  relation. Thus, adaptivity is a general form of
		  self-stabilization (which is ``true secures Q in
		  S''). But in self-stabilization, legal states are
		  usually defined in terms of internal
		  variables. In adaptive programs there can be changes
		  of the definition of legal states imposed by the
		  envrionment."
	}

@ARTICLE{Gouda:1991:SCP,
	AUTHOR = "Mohamed G. Gouda and Nicholas J. Multari",
	TITLE = "Stabilizing communication protocols",
	JOURNAL = j-IEEE-TRANS-COMP,
	VOLUME = 40,
        NUMBER = 4,
        MONTH = apr,
	YEAR = 1991,
	PAGES = "448--458",
        annote = "convergence stair presented"
	}



@Article{Herlihy:1991:SGD,
  author = 	 {Maurice P. Herlihy and Jeannette M. Wing},
  title = 	 {Specifying graceful degradation},
  journal = 	 {IEEE Transactions on Parallel and Distributed Systems},
  year = 	 {1991},
  OPTkey = 	 {},
  volume = 	 {2},
  number = 	 {1},
  pages = 	 {93--104},
  month = 	 jan,
  OPTnote = 	 {},
  annote = 	 {The authors show how the ideal specification of a program 
                  can be ``degraded'' in a structured way so that the
                  behavior of the program is still ``close'' to the ideal
                  specification if the environment (faults etc.) prohibits
                  the ideal specification to be satisfied. Processes and
                  the environment are modeled as finite state machines. 
                  State transitions of the processes are called operations,
                  transitions of the environment are called events. The
                  combined automaton produces executions (sequences of
                  state/operation pairs). The ideal specificiation prescribes
                  a certain set of executions assuming a certain state of
                  the environment. The environment ensures some properties
                  called constraints. Events cause these constraints to be
                  violated, resulting in an `enlarged' behavior of the
                  combined automaton. Depending on the set of constraints
                  guaranteed by the environment, the combined automaton
                  satisfies a weaker specification than the ideal 
                  specification. The constraints induce a lattice on
                  the set of specifications of the automaton. This allows
                  a designer to specify system behavior in the presence
                  of violated constraints. (Cases where this can arise
                  in practice are faults, timing violations or security
                  breaches.) The method (called the lattice relaxion method)
                  it makes environmental assumptions explicit and enables
                  you to specify unwanted but sometimes unavoidable 
                  cases of system performance. Let's see how specifications
                  can be systematically parametrized to yield such a
                  lattice. [This paragraph was written in a state of
                  partial sleep deliria; do not infer the quality of
                  the paper from this text. In fact, the paper is
                  very deep and interesting.] A similar idea is mentioned
                  in \cite{Schepers:1993:TFT}. Does not cite 
                  \cite{Weber:1989:FSF}.}
}

@PhdThesis{Herman:1991:ATD,
  author = 	 {Ted Herman},
  title = 	 {Adaptivity through distributed convergence},
  school = 	 {Department of Computer Science, University of Texas
                  at Austin},
  year = 	 {1991},
  OPTkey = 	 {},
  OPTaddress = 	 {},
  OPTtype = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {nicht kopiert/ausgedruckt}
}

@InProceedings{Liskov:1991:PUS,
  author =       "Barbara Liskov",
  title =        "Practical Uses of Synchronized Clocks in Distributed
                 Systems",
  pages =        "1--10",
  ISBN =         "0-89791-439-2",
  editor =       "Luigi Logrippo",
  booktitle =    pro-podc91,
  address =      "Mont{\'e}al, Qu{\'e}bec, Canada",
  month =        aug,
  year =         "1991",
  publisher =    pub-ACM,
  annote =       "Discusses several uses of synchronized clocks in distributed
    algorithms: SCMP protocol (a protocol that achieves at-most-once 
    semantics of messages), tickets in Kerberos, and several forms of
    leases \cite{Gray:1989:LEF} for maintaining replica consistency. The
    starting point to remeber is that assumptions about clock rates are
    always probabilistic and so assumptions about synchronization should
    only affect the performance not the correctness of a protocol. In
    general, time is used to achieve liveness, e.g. a server requests
    a replica to give up its lease; it waits either until the replica
    replies or its lease expires. The final paragraph contains some
    ideas on how to transform an algorithm not relying on synchronized
    clocks into more efficient versions using synchronized clocks: (1)
    identify messages which could be avoided using timestamps, (2)
    if message exchange is already reduced, find ways to save storage
    using timestamps (e.g. purge storage after $t$ seconds)."
}

@PhdThesis{Liu:1991:FTP,
  author = 	 {Zhiming Liu},
  title = 	 {Fault-tolerant programming by transformations},
  school = 	 {University of Warwick, Department of Computer Science},
  year = 	 {1991},
  OPTkey = 	 {},
  OPTtype = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = {Published and extended in many forms
  \cite{Liu:1992:TPF,Liu:1993:SVR,Liu:1994:SDF,Liu:1995:FFF} and
  \cite{Liu:1996:VFR,Liu:1998:SVF} but seemingly the only reference to
  the term ``finite error behavior'' (p. 27).}
}

@InProceedings{Long:1991:SRI,
  author =       {Darrel D. E. Long and John L. Carroll and C. J. Park},
  title =        {A study of the reliability of {Internet} sites},
  booktitle =    pro-srds91,
  OPTcrossref =  {},
  OPTkey =       {},
  OPTeditor =    {},
  OPTvolume =    {},
  OPTnumber =    {},
  OPTseries =    {},
  year =         {1991},
  OPTorganization = {},
  OPTpublisher = {},
  OPTaddress =   {},
  month =        sep,
  pages =        {177--186},
  OPTnote =      {},
  annote =       {to read}
}

@Book{Manna:1991:TLR,
  author = 	 {Zohar Manna and Amir Pnueli},
  title = 	 {The temporal logic of reactive and concurrent systems:
                  Specification},
  publisher = 	 pub-SV,
  year = 	 {1991},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {See also \cite{Manna:1995:TVR}.}
}


@InProceedings{Marzullo:1991:DGS,
  author =       "Keith Marzullo and Gil Neiger",
  title =        "Detection of global state predicates",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  pages =        "254--272",
  booktitle = pro-wdag91,
  year =         "1991",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  OPTmonth =     "",
  OPTnote =      "",
  OPTannote =    ""
}

@TechReport{Marzullo:1991:TCD,
  author =       "Keith Marzullo and Mark D. Wood",
  title =        "Tools for Constructing Distributed Reactive Systems",
  institution =  "Dept.\ of Computer Science, Cornell University",
  year =         "1991",
  number =       "TR 91-1193",
  address =      "Ithaca, New York ({USA})",
  month =        feb,
  annote =       "mentions sensor/actuator approach [to read]"
}

@Article{Oezveren:1991:SSD,
  title =        "Stability and Stabilizability of Discrete Event
                 Dynamic Systems",
  author =       "C{\"u}neyt M. {\"O}zveren and Alan S. Willsky and
                 Panos J. Antsaklis",
  area =         "Theory of Computation",
  pages =        "730--752",
  journal =      j-ACM,
  month =        jul,
  year =         "1991",
  volume =       "38",
  number =       "3",
  general-terms = "Algorithms, Design, Languages, Reliability, Theory",
  keywords =     "Reliability, self-stabilizing systems, stability,
                 stabilizability, state feedback",
  cr-categories = "F.2.2 [computations on discrete structures \and
                 sequencing and scheduling]; F.4.3 [algebraic language
                 theory \and classes defined by grammars or automata];
                 G.2.2 [graph algorithms]; G.4 [algorithm analysis \and
                 reliability and robustness]; H.2.8; J.7 [command and
                 control \and process control]",
  annote =       "[to read]"
}

@Article{Peleska:1991:DVF,
  author =       "Jan Peleska",
  title =        "Design and Verification of Fault Tolerant Systems with
                 {CSP}",
  pages =        "95--106",
  journal =      j-DC,
  volume =       "5",
  number =       "2",
  year =         "1991",
  publisher =    pub-SV,
  annote = "A case study in proving a hot standby system correct using
  CSP. The proof method is like in CSP and proves refinements down
  several levels to the implementation. At some lower level, crash
  faults are introduced and masked by a redundant component together
  with a reconfiguration procedure. It seems as if faults and fault
  actions are modeled explicitly. Conversly to \cite{Peled:1994:CFF},
  refinement steps are constructed by hand instead of using
  correctness preserving transformations (this is advocated as
  ``invent and verify'' which is claimed to suit industry). You need
  to know good CSP to really understand the text. If only parts of the
  system properties may be proved, this is noted to be something like
  graceful degradation."
}



@Article{Ralston:1991:FMH,
  author = 	 {T. J. Ralston and S. L. Gerhart},
  title = 	 {Formal methods: {History}, practice, trends and prognosis},
  journal = 	 {American Programmer},
  year = 	 {1991},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTpages = 	 {},
  month = 	 may,
  OPTnote = 	 {},
  annote = 	 {[to get], cited in \cite{Glass:1999:RST} as the only study
                  which has produced hard numbers on the benefit of 
                  applying formal methods in software engineering.}
}

@InProceedings{Sanders:1991:PTA,
  author =       "Beverly Sanders",
  title =        "A Predicate Transformer Approach to Knowledge and
                 Knowledge-based Protocols",
  pages =        "217--22",
  ISBN =         "0-89791-439-2",
  editor =       "Luigi Logrippo",
  booktitle =    pro-podc91,
  address =      "Mont{\'e}al, Qu{\'e}bec, Canada",
  month =        aug,
  year =         "1991",
  publisher =    "ACM Press",
  annote =       "[to read]"
}


@Article{Swade:1991:CCB,
  author =       "D. Swade",
  title =        "The construction of {Charles Babbage's} difference
                 engine.",
  journal =      "Annals of the History of Computing.",
  volume =       "13",
  number =       "1",
  pages =        "82--83",
  year =         "1991",
  keywords =     "Babbage, difference engine",
  abstract =     "Science Museum UK is building Babbage's difference
                 engine (not his analytic engine which \~{} computer) to
                 celebrate 200-th anniversary of Babbage's death (1771).
                 4000 parts, 3 tons, 10x6x1.5 feet Being built in
                 materials and with accuracy of Babbage's day. The D.E.
                 calculates 7th order polynomials to 30 decimal
                 places.",
  annote =       "Bowen \cite{Bowen:1993:SCS} cites another text by 
                 Swade towards the concerns of Charles Babbage about
                 the `table crisis' which lead to the development
                 of the difference engine."
}



@Book{Tel:1991:TDA,
  author = 	 {Gerard Tel},
  title = 	 {Topic in Distributed Algorithms},
  publisher = 	 {Cambridge University Press},
  year = 	 {1991},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  number = 	 {1},
  series = 	 {Cambridge International Series on Parallel Computation},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@PhdThesis{Arora:1992:FFC,
  author =       {Anish Kumar Arora},
  title =        {A foundation of fault-tolerant computing},
  school =       {The University of Texas at Austin},
  year =         {1992},
  OPTkey =       {},
  OPTaddress =   {},
  OPTtype =      {},
  month =        dec,
  OPTnote =      {},
  annote =       "Arora's thesis defines fault tolerance as the result
                  of two conditions: closure and convergence. Closure
                  means that a system remains in a set of legal states
                  during normal system behaviour, convergence assures
                  that any fault (modelled as actions on an extended
                  state space \cite{Cristian:1985:RAF}) is eventually
                  tolerated by returning into the set of legal states
                  in finite time. This is a stabilizing notion of
                  fault tolerance, published in an IEEE conference
                  proceedings \cite{Arora:1993:CCF} and subsequently
                  enhanced into a theory of correctors and detectors,
                  a general theory of fault tolerance
                  \cite{Arora:1998:CDM}."
}


@Article{Beauquier:1992:TDP,
  author =       "Joffroy Beauquier",
  title =        "Two distributed problems involving {Byzantine}
                 processes",
  journal =      "Theoretical Computer Science",
  volume =       "95",
  number =       "1",
  pages =        "169--185",
  day =          "23",
  month =        mar,
  year =         "1992",
  annote =       "The author investigates two problems where processes
		  are subject to Byzantine behavior: the naming
		  problem (i.e., assigning unique names to processes)
		  and the mutual exclusion problem. For these problems
		  to be solvable, the author makes the following
		  minimal assumptions: if k is a strict upper bound on
		  the number of Byzantine processes, then the network
		  must be k-connected, meaning that there are at least
		  (k+1) disjoint paths between any two nodes. Also,
		  communication must be synchronous and the algorithm
		  must be non-uniform, i.e., there exists an
		  exceptional node (the initiator) which is
		  non-Byzantine. Access to public key cryptosystems is
		  assumed. For mutual exclusion, the Byzantine
		  processes may not hold the token arbitrarily long,
		  i.e., their behavior is correct once they are in
		  their critical section and during the exit sequence
		  from it. This leads to the definition of locally
		  Byzantine processes. Difficulties arise, because
		  Byzantine nodes may not generally be detected. They
		  may act normally when communicating to the outside
		  world forever. Naming is achieved through a kind of
		  echo algorithm that achieves safety through
		  backwards confirmation over a given path. Through
		  the k connectivity and communication synchrony it is
		  assured that valid information eventually reaches a
		  correct node and that this node can check the
		  information. The mutual exclusion algorithm bases on
		  Byzantine agreement. " 
}



@InProceedings{Chandra:1992:WFD,
  author =       "Tushar Deepak Chandra and Vassos Hadzilacos and Sam
                 Toueg",
  title =        "The Weakest Failure Detector for Solving Consensus",
  pages =        "147--158",
  editor =       "Maurice Herlihy",
  booktitle =    pro-podc92,
  address =      "Vancouver, BC, Canada",
  month =        aug,
  year =         "1992",
  publisher =    "ACM Press",
  annote =       "Continuing work \cite{Chandra:1991:UFD} shows that a
                  certain form of failure detector is the weakest one
                  necessary to solve consensus. See also journal
                  version of this paper \cite{Chandra:1996:WFD} and
                  other papers on this subject
                  \cite{Chandra:1996:UFD,Chandra:1991:UFD}."
}




@Article{Hariri:1992:ASD,
  author =       "Salim Hariri and Alok Choudhary and Behcet Sarikaya",
  title =        "Architectural Support for Designing Fault-Tolerant
                 Open Distributed Systems",
  journal =      j-IEEE-COMPUTER,
  volume =       "25",
  number =       "6",
  pages =        "50--62",
  month =        jun,
  year =         "1992",
  annote =       "[to read]"
}



@TechReport{Heimerdinger:1992:CFS,
  author = 	 {Walter L. Heimerdinger and Chuck B. Weinstock},
  title = 	 {A conceptual framework for system fault tolerance},
  institution =  {Software Engineering Institute},
  year = 	 {1992},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {CMU/SEI-92-TR-33},
  address = 	 {Carnegie Mellon University, Pittsburgh, PA},
  month = 	 oct,
  OPTnote = 	 {},
  annote = {A good introductory text to the traditional concepts and
  issues in fault-tolerant computing (in the lines of
  \cite{Laprie:1992:DBC}) targeted at egnineers and
  practicioners. Defines a system, dependability specifications
  (repeats the $10^{-9}$ reliability rate of commercial aircraft,
  states the problems with implicit and explicit specifications
  commented on by David Powell in Madeira), failure modes, faults
  vs. failures (fault is the failure of a subcomponent, avoids the
  term error), defines failure regions as oppsed to fault regions
  (vertical vs. horizontal perspective).  Enumerates fault tolerance
  mechanisms (mainly redundancy management) and gives informal
  definitions of time and space redundancy, which are said to be
  necessary, not sufficient. The conclusions contain a set of 5 rules
  for the practitioner how to start off building reliable
  systems. Everything is underlined with running examples from bridge
  building and computer systems.}
}

@Article{Huang:1992:SSA,
  author =       "Shing Tsaan Huang and Nian Shing Chen",
  title =        "A self-stabilizing algorithm for constructing
                 breadth-first trees",
  journal =      j-IPK,
  volume =       "41",
  number =       "2",
  pages =        "109--117",
  day =          "14",
  month =        feb,
  year =         "1992",
  coden =        "IFPLAT",
  ISSN =         "0020-0190",
  mrclass =      "68M15",
  mrnumber =     "93a:68017",
  bibdate =      "Wed Nov 11 12:16:26 MST 1998",
  acknowledgement = ack-nhfb,
  affiliation =  "Natl Tsing Hua Univ",
  affiliationaddress = "HsinChu, Taiwan",
  classification = "723; 921; C1160 (Combinatorial mathematics); C4240
                 (Programming and algorithm theory)",
  corpsource =   "Inst. of Comput. Sci., Nat. Tsing Hua Univ., HsinChu,
                 Taiwan",
  journalabr =   "Inf Process Lett",
  keywords =     "algorithm theory; bounded function; Breadth First
                 Trees; breadth-first trees; computation step; Computer
                 Programming --- Algorithms; Fault Tolerant Software;
                 Mathematical Techniques; rules; self-stabilizing
                 algorithm; Self-Stabilizing Algorithms; Trees; trees
                 (mathematics)",
  treatment =    "T Theoretical or Mathematical",
  annote = "[to get] Difference to \cite{Chen:1991:SAC}?"
}

@Book{Isermann:1992:IDS,
  author = 	 {Rolf Isermann},
  ALTeditor = 	 {},
  title = 	 {{Identifikation dynamischer Systeme}},
  publisher = 	 pub-SV,
  year = 	 {1992},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Berlin},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}



@Book{Laprie:1992:DBC,
  ALTauthor = 	 {},
  editor = 	 {Jean-Claude Laprie},
  title = 	 {Dependability: {Basic} concepts and Terminology},
  publisher = 	 pub-SV,
  year = 	 {1992},
  OPTkey = 	 {},
  volume = 	 {5},
  OPTnumber = 	 {},
  series = 	 {Dependable Computing and Fault-Tolerant Systems},
  OPTaddress = 	 pub-SV:adr,
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {A joint attempt to unify dependability terminology and
                  present it in 5 languages! Great! Maybe based on
                  \cite{Laprie:1985:DCF}.}
}

@article{Liu:1992:TPF,
     author = {Zhiming Liu and Mathai Joseph},
      title = {Transformation of Programs for Fault-tolerance },
    journal = {Formal Aspects of Computing },
     volume = { 4 },
     number = { 5 },
       year = { 1992 },
      pages = { 442--469 },
  annote = {``The task is then to develop programs which perform
  predictably in the presence of {\emph detected} system failures, and
  this requires the representation of such failures in the execution
  of a program.'' (p.443) This must be done at different levels of
  abstraction so it is good to use the same formalism for
  specifications and programs. The formalism used is close to TLA
  \cite{Lamport:1994:TLA} with its state based specification method
  \cite{Lamport:1989:SAS} and notion of refinement
  \cite{Abadi:1991:ERM}. Physical faults are modeled as actions that
  transform a good state into an error state which may lead to a
  violation of the specification. If such an action occurs, a boolean
  variable $f$ is set to true (modeling fault detection by underlying
  hardware). Faults cannot destruct program operations at the lowest
  level of abstraction. The fault affected version of a program $P$ is
  obtained by a transformation $F$ which is assumed to mimic fail-stop
  semantics \cite{Schlichting:1983:FSP}, i.e. once a fault action is
  executed, no further regular program actions occur. To make the
  fault affected version satisfy the original specification $S$, a
  fault-tolerant transformation $T$ must be applied such that
  $F(T(P))$ satisfies $S$. Usually, the transformed version will
  satisfy a weaker specification. $T$ is modeled as adding recovery
  actions which are only enabled when $f$ is true. It is assumed that
  recovery actions are not affected by faults. Overall $F(T(P))$ can
  then be shown to be equivalent to the parallel execution of program,
  fault and recovery actions. Fairness guarantees eventual
  recovery. At a step during the refinement process where there is
  sufficient information about the fault environment (such as the
  number of faulty processes/channels), then the recovery
  transformation can be devised. A specification language for action
  systems similar to \cite{Chandy:1988:PPD} and a notion of
  satisfiability between program and specification is
  devised. (Section 4:) The failure semantics of a program $P$
  regarding a fault set $F$ are the set of executions of $P$ augmented
  by possible executions of actions from $F$. As $F$ is fail-stop,
  this results in a sequence of good states followed by an empty or
  infinite sequence of bad states. A fault transformation is defined
  which changes every command construct to result in the failure
  semantics of the program. With this formal definition of the
  transformation it is actually proved that $F(P)$ is parallel
  execution of $P$ and fault program. Fault transformations are
  transferred to sets of processes. Section 5 defines consistency and
  recovery transformations, the latter in analogy to fault
  transformations. Section 6 defines fault refinement, proves some
  properties of it and recovery transformations, and also proves some
  useful rules when refining programs to make them fault tolerant. A
  protocol for reliable communication is used as an example for the
  method. Interestingly, a variable $b$ is used in the fault program
  which guarantees the finiteness of consecutive faults. Overall,
  safety and progress properties can now be proved. The discussion
  (Sect. 8) states that the highest level of fault environment is the
  transition of $f$ from false to true. The next level action system
  is then an action which assigns $f$ the value true. Subsequent
  refinement steps must introduce more information about the system
  and its faults as the levels on which the faults occur are
  reached. It is an open question whether for any program and any
  fault model the value of $f$ can be derived at some point during the
  refinement process?! Handling real-time is an open question and is
  handled in later papers \cite{Liu:1996:VFR}. The idea of modeling
  faults as actions is attributed to
  \cite{Cristian:1985:RAF}. Overall, the paper is concise and
  rigorously formal, so at first reading many of the ideas and not
  readily visible. I had to read it twice, and after second reading
  like this text very much.}
}

@Misc{Mills:1992:NTP,
  OPTkey = 	 {},
  author = 	 {David L. Mills},
  title = 	 {Network Time Protocol (Version 3)},
  howpublished = {Internet Request for Comments RFC 1305},
  year = 	 {1992},
  month = 	 mar,
  OPTnote = 	 {},
  OPTannote = 	 {to get}
}



@PhdThesis{Nordahl:1992:SDD,
  author = 	 {Jens Nordahl},
  title = 	 {Specification and Design of Dependable Communicating 
                  Systems},
  school = 	 {Department of Computer Science, Technical University of 
                  Denmark},
  year = 	 {1992},
  OPTkey = 	 {},
  OPTtype = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {[to get] coins the terms local and global fault assumptions
                  discussed in \cite{Liu:1995:FFF}.}
}

@Article{Ostroff:1992:FMS,
  author =       "Jonathan S. Ostroff",
  title =        "{Survey of Formal Methods for the Specification and
                 Design of Real-Time Systems}",
  journal =      "Journal of Systems and Software",
  volume =       "18",
  number =       "2",
  pages =        "33--60",
  month =        apr,
  year =         "1992",
  annote = "An extensive survey of real-time programming languages,
  visual modelling languages, and most notably logics and algebras for
  specifying and verifying real-time systems. Real-time programming
  languages mostly only have delay and timeout mechanisms but lack
  formal semantics. Petri Nets are graphical modeling languages. An
  overview explains the different time semantics (linear, branching)
  and real-time temporal logics. Contains 144 references."
}

@InProceedings{Powell:1992:FMA,
  author =       "David Powell",
  title =        "Failure Mode Assumptions and Assumption Coverage",
  pages =        "386--395",
  ISBN =         "0-8186-2875-8",
  editor =       "Dhiraj K. Pradhan",
  booktitle =    "Proceedings of the 22nd Annual International Symposium
                 on Fault-Tolerant Computing ({FTCS} '92)",
  address =      "Boston, MA",
  month =        jul,
  year =         "1992",
  publisher =    "IEEE Computer Society Press",
  annote = "[to read]"
}

@InProceedings{Rushby:1992:FSV,
  author =       "John Rushby",
  editor =       "J. Vytopil",
  title =        "Formal Specification and Verification of a
                 Fault-Masking and Transient-Recovery Model for Digital
                 Flight-Control Systems",
  booktitle =    "Formal Techniques in Real-Time and Fault-Tolerant
                 Systems 2nd International Symposium",
  series =       ser-LNCS,
  volume =       "571",
  pages =        "237--258",
  publisher =    pub-SV,
  address =      "Nijmegen, The Netherlands",
  year =         "1992",
  annote = "[to read] appears also under the same name in a book
            of the same name published by the same editor in 1993
            (Kluwer).]"
}

@Book{Siewiorek:1992:RCS,
  author =       "Daniel Siewiorek and Robert Swarz",
  title =        "Reliable Computer Systems: Design and Evaluation",
  publisher =    "Digital Press",
  year =         "1992",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  OPTaddress =   "",
  OPTedition =   "",
  OPTmonth =     "",
  OPTnote =      "",
  OPTannote =    "[get it?]"
}

@InProceedings{Shankar:1992:MVG,
  title =        "Mechanical Verification of a Generalized Protocol for
                 {Byzantine} Fault-Tolerant Clock Synchronization",
  author =       "Natarajan Shankar",
  booktitle =    "Formal Techniques in Real-Time and Fault-Tolerant
                 Systems",
  editor =       "J. Vytopil",
  pages =        "217--236",
  publisher =    pub-SV,
  series =       "Lecture Notes in Computer Science",
  volume =       "571",
  month =        jan,
  year =         "1992",
  address =      "Nijmegen, The Netherlands",
  annote =       "[to read]"
}

@Article{Turek:1992:MFC,
  author =       {John Turek and Dennis Shasha},
  title =        {The many faces of consensus in distributed systems},
  journal =      j-IEEE-COMPUTER,
  year =         {1992},
  volume =       {25},
  number =       {6},
  month =        jun,
  pages =        {8--17},
  OPTnote =      {},
  annote =       "A rewarding (because well written) paper on the
                  different shades of (im)possibility of consensus in
                  distributed systems. Starts with the parable of La
                  Tryste, notes general settings in which consensus is
                  (im)possible in message passing systems (synchrony
                  of processors, message order, communication delay,
                  transmission method), relates results to shared
                  memory settings, sketches Fischer, Lynch and
                  Pattersons result \cite{Fischer:1985:IDC}, proves
                  impossibility of Byzantine agreement in message
                  passing settings without signatures. Concludes:
                  Global knowledge is much stronger than local
                  knowledge."
}

@Article{Zhao:1992:SAB,
  title =        "A Self-Adjusting Algorithm for {Byzantine} Agreement",
  author =       "Yi Zhao and Farokh B. Bastani",
  journal =      j-DC,
  pages =        "219--226",
  year =         "1992",
  volume =       "5",
  number =       "4",
  annote =       ""
}

@Article{Abadi:1993:CS,
  author = 	 {Mart{\'\i}n Abadi and Leslie Lamport},
  title = 	 {Composing Specifications},
  journal = 	 j-TOPLAS,
  year = 	 {1993},
  OPTkey = 	 {},
  volume = 	 {15},
  number = 	 {1},
  pages = 	 {73--132},
  month = 	 jan,
  OPTnote = 	 {},
  annote = 	 {A ground- and breathtaking paper on the difficulties 
      arising when composing specifications of subsystems to get a
    specification of the composed system. It is a formal investigation
    of the exact formulation of the composition principle for
    concurrent systems. A system is here something that interacts with
    an environment over a well-defined boundary. A specification of a
    system here is a set of behaviors at the boundary where the
    environment and the system alternately take steps. Steps of the
    system can contain stuttering steps (i.e., steps where the state
    of the interface does not change) and the environment makes the
    first move. A specification can be expressed by $E\Rightarrow M$
    where $E$ is an assumption about the environment and $M$ is the
    property guaranteed by the system. This is the understanding of
    the transition-axiom approach \cite{Lamport:1989:SAS}. The
    composition principle states that the composition $S$ of systems
    $S_1,\ldots,S_n$ satisfy a specification $E\Rightarrow M$ if three
    conditions hold: (1) $S$ guarantees $M$ if every $S_i$ guarantees
    its own $M_i$. (2) If $E$ holds and every $S_i$ guarantees $M_i$
    then $E_i$ holds for every $S_i$. (3) Every $S_i$ guarantees $M_i$
    if $E_i$ holds.  There is an obvious circularity here because
    every component is part of the environment of the other. The main
    result states that the composition principle is valid if the
    environment assumptions are safety properties. The paper contains
    a lot of insightfull discussions about related aspects of
    specifications and programming: state vs. action based formalisms
    are compared in section 1.1, the distinction between system and
    environment is treated in sections 1.2 and 1.3. Section 3 contains
    an elaborated discussion on realizability of specifications and
    Section 4 details on the form of a specification. It examines what
    makes up a complete or a partial program and what difficulties
    arise in composition. For example, progress properties are
    inherent part of programs, but are often states implicitly as an
    incrementation of the program counter or fairness
    assumptions. Formally, progress properties are defined using the
    term `machine realizable', meaning something like `it doesn't add
    additional safety properties'. A specification then is a formula
    $I\cap E_S\cap E_L\Rightarrow M_S\cap M_L$, where
    $E_S$ and $M_S$ are the safety properties of the environment and
    the system respectively, and $E_L$ and $M_L$ are their liveness
    properties; $I$ is an initial state predicate on the environment
    state. Theorem 1 shows that $E_L$ can be incorporated into the
    system's liveness property, resulting in a specification being
    $I\cap E_S\Rightarrow M_S\cap (E_L\Rightarrow
    M_L)$. This means that the prerequisistes of the composition
    principle are achievable. Section 4.4 states also that everything
    can be moved to the right hand side of the implication. Then the
    specification does not only specify wanted behaviour but also
    allows arbitrary behavior if $E_S$ is not met. The authors argue
    that this is impractical.}
}

@ARTICLE{Afek:1993:SSU,
	AUTHOR = "Yehuda Afek and Geoffrey M Brown",
	TITLE = "Self-stabilization over unreliable communication 
	 media",
	JOURNAL = dc,
	VOLUME = 7,
	YEAR = 1993,
	PAGES = "27--34"
	}


@ARTICLE{Arora:1993:CCF,
        AUTHOR = "Anish Arora and Mohamed Gouda",
        TITLE = "Closure and convergence: {A} foundation
         of fault-tolerant computing",
        JOURNAL = j-IEEE-TRANS-SOFTW-ENG,
        VOLUME = 19,
        NUMBER = 11,
        YEAR = 1993,
        PAGES = "1015--1027"
        }




@Article{Arora:1993:CIS,
  author = 	 "Anish Arora and Paul Attie and Michael Evangelist
		  and Mohamed Gouda",
  title = 	 "Convergence of iteration systems",
  OPTcrossref =  "",
  OPTkey = 	 "",
  journal = 	 j-DC,
  year = 	 "1993",
  volume = 	 "7",
  number = 	 "1",
  pages = 	 "43--53",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  OPTannote = 	 ""
}

@INPROCEEDINGS{Anagnostou:1993:TTP,
	AUTHOR = "Efthymios Anagnostou and Vassos Hadzilacos",
	TITLE = "Tolerating transient and permanent Failures",
	BOOKTITLE = pro-wdag93,
	YEAR = 1993,
	PAGES = "174--188",
        annote = "The authors investigate the classes of problems
		  which are solvable in the presence of transient and
		  permanent failures. They begin by stating the
		  self-stabilization has been the domain of research
		  on tolerating transient failures which manifest
		  themselves as arbitrary memory corruptions. On the
		  other hand, fault tolerance has focussed on
		  permanent failures such as process crashes. While
		  transient failures could effect all processes,
		  permanent failures were restrcicted to a certain
		  subset of processes (usually half or one third of
		  all processes). The authors show that tolerating
		  transient and permanent failures is impossible in
		  asynchronous systems for all problems which are
		  ``failure sensitive''. Failure sensitive problems
		  are such that it is vitally important to know whether a
		  process has crashed or not. Examples for failure
		  sensitive problems are leader election, consensus
		  and spanning tree construction. As an example for a
		  solvable problem they give an algorithm for unique
		  naming in ring networks. These results give insight
		  into the fundamental distinction between transient
		  and permanent failures: transient failures are
		  detectable in asynchronous systems, permanent ones
		  are not. But the impossibility results are not too
		  devastating since election and consensus are
		  unsolvable in asynchronous systems anyway
		  \cite{Fischer:1985:IDC}." 
	}



@InCollection{Babaoglu:1993:CGS,
  author =       "{\"O}zalp Babao\u{g}lu and Keith Marzullo",
  title =        {Consistent global states of distributed systems:
                  {Fundamental} concepts and mechanisms},
  booktitle =    {Distributed Systems},
  crossref =  {Mullender:1993:DS},
  OPTkey =       {},
  publisher = pub-AW,
  year =         {1993},
  editor =       {Sape Mullender},
  OPTvolume =    {},
  OPTnumber =    {},
  OPTseries =    {},
  chapter =      {4},
  OPTtype =      {},
  OPTaddress =   {},
  edition =      {Second},
  OPTmonth =     {},
  pages =        {55--96},
  OPTnote =      {},
  annote =       "A well written survey on the theory of consistent
		  global states. It is well suited as an introductory
		  text for lectures on causality, distributed
		  computations, snapshots, observations and predicate
		  detection. A more research oriented text is
		  \cite{Schwarz:1994:DCR}."
}


@Article{Barborak:1993:CPF,
  author =       "Michael Barborak and Anton Dahbura and Miroslaw Malek",
  title =        "The consensus problem in fault-tolerant computing",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-ACM-COMP-SURVEYS,
  year =         "1993",
  volume =       "25",
  number =       "2",
  pages =        "171--220",
  month =        jun,
  OPTnote =      "",
  annote =       "This paper surveys research on the consensus
                  problem, comparing and unifying the two traditional
                  approaches, which are (1) system diagnosis and (2)
                  the Byzantine Generals Problem (BGP). Approach (1)
                  tries to reach a constistent state by letting nodes
                  diagnose eachother and infer from the resulting set
                  of results (together with additional assumptions)
                  which nodes are faulty. These nodes can then be
                  avoided or shut down. Approach (2) applies
                  distributed algorithms that reach nontrivial
                  consensus on a single value in spite of possibly
                  malicious faulty nodes within the network. Fault are
                  thus masked. The paper is a near-to-complete survey
                  of research up to about 1993, rather technical but
                  exact."
}

@INPROCEEDINGS{Berrou:1993:NSL,
        AUTHOR = "C. Berrou and A. Glavieux and P. Thitimajshima",
        TITLE = "Near {S}hannon Limit Error-Correcting Coding and 
                 Decoding: Turbo Codes",
        PAGES = "1064-1070",
        booktitle = "IEEE Int. Conf. on Communications (ICC-1993)",
        year = 1993,
        annote = "the basic reference to the term `turbo codes'."
}


@Article{Birman:1993:PGA,
  author =       "K. P. Birman",
  title =        "The Process Group Approach to Reliable Distributed
                 Computing",
  journal =      j-CACM,
  volume =       "36",
  number =       "12",
  pages =        "36--53",
  year =         "1993",
  OPTkeywords =     "ISIS, process groups, replicated processes",
}



@Article{Bowen:1993:SCS,
  author = 	 {Jonathan Bowen and Victoria Stravridou},
  title = 	 {Safety-critical systems, formal methods and standards},
  journal = 	 {IEE/BCS Software Engineering Journal},
  year = 	 {1993},
  OPTkey = 	 {},
  volume = 	 {8},
  number = 	 {4},
  pages = 	 {189--209},
  month = 	 jul,
  OPTnote = 	 {},
  annote = 	 {A well-written survey of the use of formal methods
                  in industry for the design and implementation of
                  safety critical systems as of 1992 (should be read
                  in conjunction with \cite{Rushby:1994:CSP}). A source
                  for lots of citations on the importance of dependability
                  and ways to achieve it. I especially like the introduction
                  ``Human lives have depended on mathematical calculations
                  for centuries\ldots'' where Babbage is shown to be one
                  of the first researchers in computer dependability.
                  Gives examples in the fields of aviation, railway systems,
                  nuclear power plants, medical systems, ammunition 
                  control and embedded microprocessors. Standards are
                  rather UK centric. Great bibliography.}
}


@InProceedings{Diehl:1993:RAD,
  author =       "Claire Diehl and Claude Jard and Jean-Xavier Rampon",
  title =        "Reachability Analysis on Distributed Executions",
  pages =        "629--643",
  year =         "1993",
  month =        apr # "~13--17,",
  editor =       "Marie-Claude Gaudel and Jean-Pierre Jouannaud",
  booktitle =    "Proceedings of the 4th International Joint Conference
                  on Theory and Practice of Software Development
                  {TAPSOFT}'93",
  address =      "Orsay, France",
  series =       ser-LNCS,
  number =       "668",
  publisher =    pub-SV,
  annote =       "[to read]"
}

@ARTICLE{Dolev:1993:SDS,
	AUTHOR = "Shlomi Dolev and Amos Israeli and Shlomo Moran",
	TITLE = "Self-stabilization of dynamic systems
	 assuming only read/write atomicity",
	JOURNAL = j-DC,
	VOLUME = 7,
	YEAR = 1993,
	PAGES = "3--16",
        annote = "describes fair protocol combination, i.e.,
		  composition of self-stabilizing protocols."
	}

@InProceedings{Dolev:1993:WCS,
  title =        "Wait-Free Clock Synchronization (Extended Abstract)",
  author =       "Shlomi Dolev and Jennifer L. Welch",
  pages =        "97--108",
  booktitle =    pro-podc93,
  address =      "Ithaca, New York, USA",
  month =        aug,
  year =         "1993",
  annote =       "The problem solved is the following: build an
		  algorithm that guarantees that for some fixed $k$ a
		  processor P which has been working correctly for $k$
		  time units (and as long as it continues to work
		  correctly) satisfies: (1) P's clock ticks normally
		  (i.e., it is not adjusted), and (2) P's clock agrees
		  with the clocks of all other processes which have
		  been working correctly for the last $k$ time
		  periods. The algorithm should handle any form of
		  transient failures as well as ``napping'' failures,
		  i.e., processors stop operation for arbitrary long
		  times and then resume work without noticing that
		  they have stopped. A protocol that achieves this
		  goal in the presence of napping failures is called
		  wait-free. The authors present four such algorithms
		  for different system settings (non/assumption of
		  global pulse, global/local read/write atomicity
		  etc.). Two of these protocols are both wait-free and
		  self-stabilizing. Clocks seem to be unbounded. "
}



@Book{Freyermuth:1993:WFB,
  author = 	 {B. Freyermuth},
  ALTeditor = 	 {},
  title = 	 {{Wissensbasierte Fehlerdiagnose am Beispiel eines
                  Industrieroboters}},
  publisher = 	 {VDI-Verlag},
  year = 	 {1993},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  number = 	 {315},
  series = 	 {Fortschr.-Ber. VDI Reihe 8},
  address = 	 {{D\"usseldorf}},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  note = 	 {Dissertation TH Darmstadt},
  annote = 	 {[Angabe von Armin]}
}

@InProceedings{Gopal:1993:USF,
  author =       {Ajei S. Gopal and Kenneth J. Perry},
  title =        {Unifying self-stabilization and fault-tolerance},
  booktitle =    pro-podc93,
  year =         {1993},
  publisher = {ACM Press},
  pages =        {195--206},
  annote =       "The authors explore the possibility of building
		  protocols that tolerate transient (``systemic'') as
		  well as permanent (``process'') failures. They
		  arrive at similar conclusions as Anagnostou and
		  Hadzilacos \cite{Anagnostou:1993:TTP}: there are no
		  protocols that can solve general problems in finite
		  stabilization time because it is impossible to
		  distinguish a crashed process from one that
		  continually experiences send omission failures. Even
		  more, the process which cannot send messages does
		  not know whether it can communicate or not
		  because of its inability to determine how it arrived
		  at its present state. It is however possible to
		  solve problems if ``solvability'' is restricted to
		  the communicating (or functioning) subset of
		  processes. These results count for synchronous
		  (round based) protocols. The idea of problem solving
		  in the presence of transient faults is: never
		  terminate and regularly purge your computation
		  history. In the paper, also asynchronous systems are
		  examined: the authors present a self-stabilizing
		  eventually strong failure detector based on an
		  eventually weak failure detector. This failure
		  detector can help solve consensus in transient fault
		  environments. It uses unbounded counters and
		  resembles very much the Heartbeat failure detector
		  \cite{Aguilera:1997:HTF}." 
}

@article{Gumm:1993:AGA,
    author = "H. Peter Gumm",
    title = {Another glance at the {Alpern-Schneider} characterization 
             of safety and liveness in concurrent executions},
    journal = j-IPL,
    volume = "47",
    number = "6",
    pages = "291--294",
    year = "1993",
    url = "citeseer.nj.nec.com/gumm93another.html",
    annote = "Revisits the Alpern-Schneider result \cite{Alpern:1985:DL}
      on ``every property is the intersection of a safety and liveness
      property'' in a more abstract setting: The result is restated in the
      context of a meet-preserving map between two complete Boolean
      algebras.  The theorem is more general than Alpern-Schneider since
      it allows a new application in a simplified setting of UNITY 
      style logics \cite{Chandy:1988:PPD}: safety properties are those 
      where a set of transitions is forbidden. This is similar to the
      fusion-closedness assumption on specifications of 
      \cite{Arora:1998:CDM}."
}




@InCollection{Hadzilacos:1993:FTB,
  author = 	 {Vassos Hadzilacos and Sam Toueg},
  title = 	 {Fault-tolerant broadcasts and related problems},
  booktitle = 	 {Distributed Systems},
  crossref =  "Mullender:1993:DS",
  OPTkey = 	 {},
  publisher = pub-AW,
  year = 	 {1993},
  editor = 	 {Sape Mullender},
  chapter = 	 {5},
  edition = 	 {Second},
  pages = 	 {97--145}
}

@ARTICLE{Katz:1993:SEM,
	AUTHOR = "Shmuel Katz and Kenneth J. Perry",
	TITLE = "Self-stabilizing extensions for message-passing
		  systems", 
	JOURNAL = j-DC,
	VOLUME = 7,
	YEAR = 1993,
	PAGES = "17--26",
        annote = "[to write]"    
	}


@InCollection{Kopetz:1993:RTD,
  author = 	 {Hermann Kopetz and Paulo Ver{\'\i}ssimo},
  title = 	 {Real Time and Dependability Concepts},
  booktitle = 	 {Distributed Systems},
  crossref =  {Mullender:1993:DS},
  OPTkey = 	 {},
  pages = 	 {411--446},
  publisher = pub-AW,
  year = 	 {1993},
  editor = 	 {Sape Mullender},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTtype = 	 {},
  chapter = 	 {16},
  OPTaddress = 	 {},
  edition = 	 {Second},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {gives informal overview over (among others) redundancy}
}

@InProceedings{Kurshan:1993:VM6,
  author =       "R. P. Kurshan and L. Lamport",
  booktitle =    "Proceedings of the 5th International Conference on
                 Computer Aided Verification",
  year =         "1993",
  editor =       "C. Courcoubetis",
  address =      "Elounda, Greece",
  series =       ser-LNCS,
  volume =       "697",
  publisher =    pub-SV,
  title =        "Verification of a Multiplier: 64 Bits and Beyond",
  pages =        "166--179",
}

@InCollection{Liu:1993:SVR,
  author = 	 {Zhiming Liu and Mathai Joseph},
  title = 	 {Specification and verification of recovery in 
                  asynchronous communicating systems},
  booktitle = 	 {Formal Techniques in Real-time and Fault-tolerant Systems},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {137--165},
  publisher = {Kluwer},
  year = 	 {1993},
  editor = 	 {Jan Vytopil},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTtype = 	 {},
  chapter = 	 {6},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = {A nice presentation of transformation based
  fault-tolerance verification similar to \cite{Peled:1994:CFF}. The
  paper first nicely presents the formal prerequisites (states and
  behaviors, specifications and programs, refinement, asymchronous
  communication). Faults are modeled as a set of fault actions and a
  fault transformation, fault-tolerant refinement are defined as in
  \cite{Liu:1996:VFR,Liu:1995:FFF} although I like the presentation
  here most. Detection is not covered here; an error variable flags
  the detection of a physical fault. Fault tolerance is achieved by
  another form of transformation exemplified for the class of
  checkpointing and backward-recovery programs. Consistent checkpoints
  and rollback operations to the most recent checkpoint are treated in
  length and some Theorems about the sufficiency of this method are
  proved (reminds me of the optimality proof of
  \cite{Singhal:1995:OPA}). Failure during recovery is discussed: if
  recovery is fault-tolerant or not subject to faults, then recovery
  can be assumed atomic. Failures within recovery can be handled by
  restarting recovery when they are detected. Failures during
  checkpointing are handlable if we assume that there is at least one
  (initial) checkpoint available to which rollback is possible. The
  conclusions state that backward recovery will result in the
  satisfaction of a degraded specification. In open systems the
  repeated communication with the environment must not be
  neglected. The method of fault modeling is attributed to
  \cite{Cristian:1985:RAF}. This paper is seen as a generalization of
  this work and that of \cite{Schlichting:1983:FSP}.}
}



@InProceedings{Li:1993:FTD,
  author = 	 {Pei-yu Li and Bruce McMillin},
  title = 	 {Fault-Tolerant Distributed Deadlock Detection/Resolution},
  booktitle = 	 {Proceedings of the 17th Annual International Computer
                  Software and Applications Conference (COMPSAC'93)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {224--230},
  year = 	 1993,
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  month = 	 nov,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = {Also University of Missouri, Rolla, Department of Computer
    Science Technical Report Number CSC 92-04.  This paper takes the
    fault diagnosis approach to failure detection (look at
    \cite{Barborak:1993:CPF} for an intro to system diagnosis). I'm
    not sure how this relates to the standard interpretation of
    failure detectors \cite{Chandra:1996:UFD}, but here eventually
    every correct process knows the identities of all failed
    processes. A deadlock detection algorithm is proposed using a
    priority based probe approach to find cycles in the wait-for graph
    of an application. It can only detect deadlocks if there is at
    most one process failure in a deadlock cycle (a result is cited
    why being better is not possible). I don't see where this
    restriction comes from. Overall a nice text giving a somewhat different
    view of detecting stable predicates. Does not cite \cite{Shah:1984:DSS}
    although that paper also does deadlock detection.}
}

@InProceedings{Lincoln:1993:FVA,
  title =        "Formal Verification of an Algorithm for Interactive
                 Consistency under a Hybrid Fault Model",
  author =       "Patrick Lincoln and John Rushby",
  booktitle =    "Computer-Aided Verification, CAV '93",
  editor =       "Costas Courcoubetis",
  pages =        "292--304",
  publisher =    pub-SV,
  series =       "Lecture Notes in Computer Science",
  volume =       "697",
  month =        jun # "/" # jul,
  year =         "1993",
  address =      "Elounda, Greece",
  annote =       "Good cite for the term `hybrid fault model'."
}

@Book{Mullender:1993:DS,
  editor =        "Sape Mullender",
  title =         "Distributed Systems",
  publisher =     pub-AW,
  edition =       "Second",
  year =          1993,
  annote =        "An excellent collection of substantial papers not
                  only on the theoretical foundations of distributed
                  systems (although these chapters are especially
                  rewarding)." 
}


@Article{Neiger:1993:SSC,
  title =        "Simulating Synchronized Clocks and Common Knowledge in
                 Distributed Systems",
  author =       "Gil Neiger and Sam Toueg",
  area =         "Distributed Computing",
  pages =        "334--367",
  journal =      "Journal of the ACM",
  month =        apr,
  year =         "1993",
  volume =       "40",
  number =       "2",
  annote =       "[to read]"
}

@InProceedings{Nordahl:1993:DFD,
  author = 	 {Jens Nordahl},
  title = 	 {Design for dependability},
  booktitle = 	 {Proceedings of the third IFIP International Working 
                  Conference on Dependable Computing for Critical 
                  Applications (DCCA-3)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {29--38},
  year = 	 {1993},
  editor = 	 {Carl E. Landwehr},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = {Nordahl shows how to verify that a system consisting of
  subcomponents can be proved correct in the presence of component
  failures. Three concepts are basic: (1) design, (2) correctness of
  design and (3) failure mode. A system is a process (here, CSP
  \cite{Hoare:1984:CSP} is used as a formailsm throughout; we use the
  terminology of Lamport \cite{Lamport:1989:SAS} in this
  annotation). A specification is a property. A process $P$ implements
  a specification $S$ if all executions of $P$ are contained in $S$. A
  distinction between processes and specifications is made but it is
  remarked that this is not a central requirement (so other formalisms
  such as \cite{Pnueli:1981:TSC} can be used). A system can consist of
  a collection of subsystems (or components). A design determines how
  the components interact. (1) A design is a tuple consisting of a
  function mapping $n$ systems to a (new) system and a set of $n$
  subsystem specifications. The function can be some composition
  operator (parallel, sequential etc. and compositions of these
  operators). A design contains enough information to reason about the
  corresponding hierarchical level. (2) A design is correct regarding
  a specification $S$ iff the combined system satisfies $S$ whenever
  the subcomponents satisfy their specification. (3) A failure mode is
  a specification describing the behavior of a system when it is
  faulty. Such a failure mode may be given by a component designer
  when making assumptions at design time or it may be derived by an
  engineer from observing faulty system behavior at runtime. For
  example, a failure mode for Byzantine behaviour is the predicate
  true. Components can have several failure modes, and for $n$
  components this is expressed as an $n$-tuple $(F_1,\ldots,F_n)$ of
  sets of failure modes. For one combination of failure modes
  $(S_1,\ldots,S_n)$ one can prove that a design involving these
  subcomponents is correct regarding some system specification
  $S$. This can be extended to cover all possible combinations of
  component failure modes (e.g. to show that the system satisfies $S$
  in any case). Two notions of fault-tolerance are defined: masking
  fault tolerance (calles `fault-tolerance') and fail-softness. A
  system design is fault-tolerant if it is correct regarding an
  $n$-tuple of component failure modes and the original correctness
  specification $S$. Fail-softness is defined as fault tolerance where
  $S$ is replaced by some weaker specification (which one to choose is
  a pragmatic issue, says Nordahl). The proof of correctness of design
  and fault tolerance can now be performed in the same logical
  framework as before. An example (stand by spare system) is given and
  proved. The conclusions discuss the following aspects: (a) the
  faulty behavior of a components is not given as a ``delta'' of its
  original correctness specification and a description of faulty
  behavior, but rather as a ``finished'' specification (i.e. a failure
  mode). Another approach is to calculate the weakened specification
  from the original specification and a failure model (such methods
  are \cite{Liu:1992:TPF,Peleska:1991:DVF,Gaertner:1999:ESD}). The
  disadvantage of the calculational approach is the necessity of
  calculations and the restrictions imposed on specifying faulty
  behavior. (I think both are equivalent.) (b) compositionality is
  achieved by defining fault tolerance of a design as a function of a
  single combination of subcomponent failure modes. Global assumptions
  about what combinations may arise can be dealt with at a higher
  level. (c) calculating the likelihood of failure can be integrated
  into the method quite easily by associating probabilistic measures
  to combinations of failure modes.  Overall this is a very concise
  and well-written paper.}
}

@InProceedings{Ricciardi:1993:UPN,
  author = 	 "Aleta Ricciardi and {Andr\'e} Schiper and Kenneth Birman",
  title = 	 "Understanding partitions and the ``no partition''
		  assumption",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  OPTpages = 	 "",
  booktitle = "Proceedings of the 4th Workshop on Future Trends of
		  Distributed Computing Systems (FTDCS-4)",
  year = 	 "1993",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "[to read]"
}

@InProceedings{Schepers:1993:CPT,
  author =       "R. Gerth and H. Schepers",
  title =        "A Compositional Proof Theory for Fault Tolerant
                 Real-Time Distributed Systems",
  pages =        "34--43",
  booktitle =    "Symposium on Reliable Distributed Systems ({SRDS}
                 '93)",
  month =        oct,
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos, Ca., USA",
  year =         "1993",
  ISBN =         "0-8186-4310-2",
  annote =       "[to get] Extends the work of \cite{Schepers:1994:TCP} 
                 to real time."
}

@InProceedings{Schepers:1993:TFT,
  author = 	 {Henk Schepers},
  title = 	 {Tracing Fault Tolerance},
  booktitle = 	 {Proceedings of the third IFIP International Working 
                  Conference on Dependable Computing for Critical 
                  Applications (DCCA-3)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {39--48},
  year = 	 {1993},
  editor = 	 {Carl E. Landwehr},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = {Basis to this paper is a system of sequential processes
  communicating via synchronous unidirected channels much in the sense
  of CSP \cite{Hoare:1984:CSP}. The semantics of a process are the set
  of possible message sequences (called histories or behaviors) at its
  observable channels. The set of all histories is divided into
  normal, exceptional and catastrophic bevaviors. Normal and
  exceptional ones are acceptable, and only these are covered by fault
  tolerance mechanisms. Catastrophic behaviors fall outside of the
  fault hypothesis. A fault hypothesis is a reflexive relation on
  histories defining how the fault changes the history (i.e. a
  relation on normal behaviors and exceptional behaviors). A set of
  behaviors with respect to a fault hypothesis is obtained by
  augmenting the original set of traces according to the fault
  hypothesis relation. Reflexivity ensures that only traces are added
  (none removed) from the original set of traces. To prove that a
  system tolerates some fault hypothesis one must show, that the
  composition of the original system running under some fault
  hypothesis and some tolerance mechanism satisfies the original
  correctness specification. The examples given are a communication
  channel which may lose or corrupt messages, and a ``stable
  disk''. Only safety properties are investigated. The conclusions
  contain a good survey of formal methods in fault tolerance up to
  1993: Christian \cite{Cristian:1985:RAF} is cited as the first to
  separate normal specification from tolerance
  specification. Formalisms in which faults are treated explicitly are
  \cite{Weber:1989:FSF,Joseph:1987:PRF,Peleska:1991:DVF}. The final
  sentence is: ``We currently investigate modeling graceful
  degradation as switching to another, less ambitious, set of
  acceptable histories.'' For this, see \cite{Herlihy:1991:SGD}.
  An extended version appeared as \cite{Schepers:1994:TCP}.}
}

@InProceedings{Schiper:1993:VSC,
  author =       "{Andr\'e} Schiper and Aleta Riccardi",
  title =        "Virtually-synchronous communication based on a weak
                  failure suspector",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  pages =        "534--543",
  booktitle = pro-ftcs93,
  year =         "1993",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "A (quite mind-blowing) paper with lots of notation
		  and definitions on how to implement a
		  group membership service with certain semantics in
		  asynchronous environments. The information that a
		  process has about the functional states of the other
		  processes in the group is called its
		  view. Membership services that allow only a single
		  view to exist in the system are said to have linear
		  semantics. Those which allow concurrent views have
		  either weak-partial (views may overlap) or strong
		  partial (views may not overlap) semantics. The paper
		  shows that strong partial semantics are related to
		  virtually synchronous communication (VSC), however, an
		  intuitive definition of VSC is not readily
		  given. The authors propose a three-component
		  architecture for implementing VSC in asynchronous
		  systems: a weak failure suspector forms the basis
		  for a view and a mulicast component, which interact
		  on a higher level. The failure suspector has weak
		  completeness and the accuracy is ensured by having
		  either forcefully crashing the suspected process or
		  by ensuring that the suspected process equally
		  suspects the suspecting process. Crashed processes
		  can recover but are thereafter new processes with
		  new process identities. The failure suspector used
		  here does not seem to fit into the scheme of Chandra
		  and Toeug \cite{Chandra:1996:UFD}."
}

@ARTICLE{Schneider:1993:SS,
        AUTHOR = "Marco Schneider",
        TITLE = "Self-stabilization",
        JOURNAL = j-ACM-COMP-SURVEYS,
        VOLUME = 25,
        number = 1,
        YEAR = 1993,
        PAGES = "45--67",
        annote = "Standard reference survey on self-stabilization,
                  nearly always cited together with Dijkstra
                  \cite{Dijkstra:1974:SSS}."
}

@InCollection{Schneider:1993:WGM,
  author =       {Fred B. Schneider},
  title =        "What good are models and what models are good?",
  booktitle =    "Distributed Systems",
  OPTcrossref =     {Mullender:1993:DS},
  publisher =    pub-AW,
  year =         1993,
  editor =       {Sape Mullender},
  OPTvolume =    {},
  OPTnumber =    {},
  OPTseries =    {},
  chapter =      {2},
  OPTtype =      {},
  OPTaddress =   {},
  edition =      {Second},
  OPTmonth =     {},
  pages =        {17--26},
  OPTnote =      {},
  OPTannote =    {}
}


@PhdThesis{Varghese:1993:SLC,
  author = 	 {George Varghese},
  title = 	 {Self-stabilization by local checking and correction},
  school = 	 {MIT},
  year = 	 {1993},
  OPTkey = 	 {},
  OPTaddress = 	 {},
  OPTtype = 	 {},
  OPTmonth = 	 {},
  note = 	 {Published as Technical Report MIT/LCS/TR-583},
  OPTannote = 	 {to write}
}


@InCollection{Verissimo:1993:RTC,
  author = 	 {Paulo Ver{\'\i}ssimo},
  title = 	 {Real-time communication},
  booktitle = 	 {Distributed Systems},
  crossref =  {Mullender:1993:DS},
  OPTkey = 	 {},
  pages = 	 {447--490},
  publisher = pub-AW,
  year = 	 {1993},
  editor = 	 {Sape Mullender},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTtype = 	 {},
  chapter = 	 {17},
  OPTaddress = 	 {},
  edition = 	 {Second},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {u.a. defines steadyness und tightness}
}

@Article{Abadi:1994:OFR,
  author =       "Mart\'{\i}n Abadi and Leslie Lamport",
  title =        "An Old-Fashioned Recipe for Real Time",
  journal =      j-TOPLAS,
  volume =       "16",
  number =       "5",
  pages =        "1543--1571",
  month =        sep,
  year =         "1994",
  url =          "http://www.acm.org/pubs/toc/Abstracts/0164-0925/186058.html",
  abstract =     "Traditional methods for specifying and reasoning about
                 concurrent systems work for real-time systems. Using
                 TLA (the temporal logic of actions), we illustrate how
                 they work with the examples of a queue and of a
                 mutual-exclusion protocol. In general, two problems
                 must be addressed: avoiding the real-time programming
                 version of Zeno's paradox, and coping with
                 circularities when composing real-time
                 assumption/guarantee specifications. Their solutions
                 rest on properties of machine closure and
                 realizability.",
  keywords =     "theory; verification",
  subject =      "{\bf F.3.1}: Theory of Computation, LOGICS AND
                 MEANINGS OF PROGRAMS, Specifying and Verifying and
                 Reasoning about Programs, Specification techniques.
                 {\bf D.2.4}: Software, SOFTWARE ENGINEERING, Program
                 Verification, Correctness proofs.",
  annote = "[to get]"
}

@Article{Afek:1994:RCU,
  title =        "Reliable Communication Over Unreliable Channels",
  author =       "Yehuda Afek and Hagit Attiya and Alan Fekete and
                 Michael Fischer and Nancy Lynch and Yishay Mansour and
                 Dai-Wei Wang and Lenore Zuck",
  pages =        "1267--1297",
  journal =      "Journal of the ACM",
  month =        nov,
  year =         "1994",
  volume =       "41",
  number =       "6",
  annote =       "[to read]"
}

@InProceedings{Alur:1994:FF,
  title =        "Finitary Fairness",
  author =       "Rajeev Alur and Thomas Henzinger",
  pages =        "52--61",
  booktitle =    "Proceedings, Ninth Annual {IEEE} Symposium on Logic in
                 Computer Science",
  year =         "1994",
  month =        "4--7 " # jul,
  address =      "Paris, France",
  organization = "IEEE Computer Society Press",
  references =   "{STOC::AlurAT1994} {JACM::BrachaT1985}
                 {JACM::DworkLS1988} {JACM::FischerLP1985}
                 {JACM::PeaseSL1980}",
  annote = "Introduces the term finitary fairness: requires that for
  every run f the system there is an unknown bound $k$ such that no
  enabled transition is postponed more than $k$ consecutive
  times. Cited and discussed in \cite{Merritt:1998:FSO}."
}

@INPROCEEDINGS{Arora:1994:CSB,
        AUTHOR = "Anish Arora and Mohamed G. Gouda and George Varghese",
        TITLE = "Constraint satisfaction as a basis for
         designing nonmasking fault-tolerance",
        BOOKTITLE = pro-icdcs94,
        YEAR = 1994,
        PAGES = "424--431",
        annote = "Important paper on self-stabilization
		  methodologies. Has many relations to Varghese's
		  thesis \cite{Varghese:1993:SLC}. Published as a more
		  citeable Journal version \cite{Arora:1996:CSB}."
        }


@ARTICLE{Arora:1994:DR,
        AUTHOR = "Anish Arora and Mohamed G. Gouda",
        TITLE = "Distributed reset",
        JOURNAL = j-IEEE-TRANS-COMP,
        VOLUME = 43,
        NUMBER = 9,
        MONTH = sep,
        YEAR = 1994,
        PAGES = "1026--1038",
        annote = ""
        }

@InProceedings{Arora:1994:ERT,
  author =       "Anish Arora",
  title =        "Efficient Reconfiguration of Trees: {A} Case
                 Study in Methodical Design of Nonmasking
                 Fault-Tolerant Programs",
  booktitle =    "Proceedings of the 3rd International Symposium on 
                 Formal Techniques in Real-Time and Fault-Tolerant
                 Systems (FTRTFTS'94)",
  year =         "1994",
  editor =       "{H. Langmaack} and {W.-P. de Roever} and {J.
                 Vytopil}",
  pages =        "110--127",
  organization = "Organized Jointly with
                 the Working Group Provably Correct Systems-ProCoS",
  volume =       "863",
  series =       ser-LNCS,
  publisher =    pub-SV,
  address =      "L{\"u}beck, Germany",
  month =        sep,
  annote = "An application of the method of constraint satisfaction
  \cite{Arora:1994:CSB} to the problem of maintaining a rooted
  spanning tree in a network of nodes that may failstop, recover and
  where links may go down temporarily. Contains a brief discussion on
  the benefits of nonmasking fault tolerance. Shows that the concept
  of stabilization can handle ``permanent'' faults as well."  
}

@INPROCEEDINGS{Awerbuch:1994:SLC,
	AUTHOR = "Baruch Awerbuch and Boaz Patt-Shamir and George Varghese
	 and Shlomi Dolev",
	TITLE = "Self-stabilizing by local checking and
	 global reset",
	BOOKTITLE = pro-wdag94,
	YEAR = 1994,
	PAGES = "326--339",
        annote = "to write"
	}

@Article{Chandrasekar:1994:ASA,
  author =       "Srinivasan Chandrasekar and Pradip K. Srimani",
  title =        "A self-stabilizing algorithm to synchronize digital
                 clocks in a distributed system",
  journal =      "Computers and Electrical Engineering",
  volume =       "20",
  number =       "6",
  year =         "1994",
  pages =        "439--444",
  annote =       "Focusses on maintaining ``hardware'' clocks in step.
    Takes the selb-stabilization view (as done by 
    \cite{Gouda:1990:SU,Arora:1991:MDS}). This means that nodes access
    neighboring states by reading variables. Thus it abstracts from
    message passing and physical clock drift."
}

@InProceedings{Cristian:1994:AFT,
  author =       "Flaviu Cristian",
  title =        "Abstractions for Fault-Tolerance",
  pages =        "278--286",
  ISBN =         "0-444-81988-6",
  editor =       "Karen Duncan and Karl Krueger",
  booktitle =    "Proceedings of the {IFIP} 13th World Computer
                 Congress. Volume 3 : Linkage and Developing Countries",
  month =        aug,
  publisher =    "Elsevier Science Publishers",
  address =      "Amsterdam, The Netherlands",
  year =         "1994",
  annote =       "The author presents some fundamental concepts of
		  fault tolerance and uses them to discuss several
		  current paradigms of fault tolerant computing. Basic
		  concepts include notions of service, server, the
		  depends-upon relation, failure classification,
		  failure semantics, failure masking by hierarchical
		  masking or by group masking. The fault tolerant
		  services discussed are: duplicated processors with
		  matching to provide crash failure semantics, error
		  detection/correction codes in stable storage to
		  provide read omission failure semantics, restartable
		  servers, point-to-point communication services,
		  distributed storage services, restartable services,
		  replicated storage and servers. Overall a paper
		  along the masking fault tolerance perspective as in
		  \cite{Cristian:1991:UFD}. When redundancy is not
		  available anymore, ``users must have some manegable
		  form of system behaviour that they can handle
		  without too much pain.'' Interesting are the two
		  laws of fault tolerance: First law: ``The stronger a
		  specified failure semantics, the more expensive and
		  complex it is to build a server that implements
		  it.'' Second law: ``The weaker the failure semantics
		  of members and communication, the more complex and
		  expensive the group management mechanisms become.''
		  Are these laws useful?"
}




@Article{Cristian:1994:CHW,
  author = 	 {Flaviu Cristian and Richard de Beijer and Shivakant Mishra},
  title = 	 {Comparing how well asynchronous atomic broadcast protocols
                  perform},
  journal = 	 {Distributed Systems Engineering Journal},
  year = 	 {1994},
  OPTkey = 	 {},
  volume = 	 {1},
  number = 	 {4},
  pages = 	 {177--201},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[to read] Title of the TR: A performance comparison of
                  asynchronous atomic broadcast protocols.}
}

@InCollection{Cristian:1994:CSP,
  author =       "Flaviu Cristian and Houtan Aghili and Ray Strong",
  editor =       "Zhonghua Yang and T. Anthony Marsland",
  title =        "Clock Synchronization in the Presence of Omission and
                 Performance Failures, and Processor Joins",
  booktitle =    "Global States and Time in Distributed Systems, IEEE
                 Computer Society Press",
  year =         "1994",
  annote =       "A revised version of \cite{Cristian:1986:CSP}. Gives 
     a simplified version of the protocol of \cite{Dolev:1995:DFC},
     possible by reducing the types of failures assumed to occur. Here,
     only omission and performance failures are taken into account
     that do not partition the network. The algorithm is based on the
     paradigm of message diffusion. It assumes a maximum message
     delivery delay and a bounded drift rate of hardware clocks.
     It is mentioned that the MTTF of modern quartz clocks exceeds
     15 to 25 years, military versions even of hundreds of years.
     Overall, a paper showing that a weaker failure model results in
     simpler protocols."
}

@INCOLLECTION{Flatebo:1994:SSD,
	AUTHOR = "Mitchell Flatebo and Ajoy Kumar Datta and Sukumar Ghosh",
	TITLE = "Self-stabilization in distributed systems",
	BOOKTITLE = "Readings in Distributed Computing 
	 Systems",
	PUBLISHER = "IEEE Computer Society Press",
	YEAR = 1994,  
	CHAPTER = 2,
	PAGES = "100--114",
	NOTE = "T.L. Casavant and M. Singal, Editors"
	}

@Article{Garg:1994:DWU,
  author =       {V. K. Garg and Brian Waldecker},
  title =        {Detection of weak unstable predicates in distributed
                  programs},
  journal =      {IEEE Transactions on Parallel and Distributed Systems},
  year =         {1994},
  OPTkey =       {},
  volume =       {5},
  number =       {3},
  OPTmonth =     {},
  pages =        {299--307},
  OPTnote =      {},
  annote =       "Angaben aus \cite{Stoller:1997:DGP}."
}

@Article{Gouda:1994:SO,
  author =       "Mohamed G. Gouda",
  title =        "Stabilizing observers",
  journal =      "Information Processing Letters",
  volume =       "52",
  number =       "2",
  pages =        "99--103",
  day =          "28",
  month =        oct,
  year =         "1994",
  keywords =     "array of temperatures; boolean value; Convergence of
                 numerical methods; Distributed computer systems;
                 distributed processing; Error analysis; Fault tolerant
                 computer systems; Observability; performance
                 evaluation; Programmed control systems; sensors;
                 stability; stabilizing observers; Stabilizing
                 observers; Stabilizing phase synchronization; System
                 stability; Uni-directional token systems",
  treatment =    "P Practical; T Theoretical or Mathematical",
}

@TechReport{Hadzilacos:1994:MAF,
  title =        "A Modular Approach to Fault-Tolerant Broadcasts and
                 Related Problems",
  author =       "Vassos Hadzilacos and Sam Toueg",
  number =       "TR94-1425",
  year =         "1994",
  month =        may,
  institution =  "Cornell University, Computer Science Department",
  pages =        "83",
  annote =       "Looks like an extended paper version of the chapter in
                  Mullender's book on distributed systems
                  \cite{Mullender:1993:DS}. The contents: While
		  theoretical research in fault tolerant distributed
		  computing has focussed mainly on solving the
		  consensus problem, applied research has investigated
		  reliable broadcasts. The authors show that both
		  problems are closely related. They give several
		  precise semantics of fault models (Sect. 2.3, e.g.,
		  they model crash failure by introducing an
		  additional non-leavable crash state and
		  corresponding state transitions) and a good
		  definition of synchrony, asynchrony and partial
		  synchrony of models (Sect. 2.4). Timing failures are
		  also discussed (sec. 2.5). They develop a suite of
		  broadcast specifications and algorithms seperately
		  and in an incremental way which is very
		  instructive. Types of broadcasts are: reliable
		  broadcast, timed reliable broadcast, uniform
		  reliable broadcast (which places restrictions on the
		  operation of faulty processes) and certain order
		  specifications (FIFO, causal, atomic). Finally, the
		  relation between consensus and atomic broadcast is
		  investigated: they show that atomic broadcast can be
		  transformed into a consensus algorithm, and that
		  reliable broadcast and consensus yield atomic
		  broadcast (all in the time-free model with
		  crashes). The paper also discusses terminating
		  variants of reliable broadcast (where processes
		  deliver messages consistently even if they weren't
		  sent, e.g., as in Byzantine Agreement
		  \cite{Lamport:1982:BGP}) and multicast
		  specifications. Contains a reference to a
		  ``forthcoming book'' on fundamentals of fault
		  tolerant distributed computing \cite{Hadzilacos:FFT}
		  which obviously has not been published yet. Overall
		  a very rewarding paper suited for introductory
		  courses on this topic." 
}

@INPROCEEDINGS{Huang:1994:DEM,
        AUTHOR = "Shing-Tsaan Huang and Lih-Chyau Wuu and Ming-Shin Tsai",
        TITLE = "Distributed execution model for self-stabilizing
         systems",
        BOOKTITLE = "ICDCS94 Proceedings of the 14th International
         Conference on Distributed Computing Systems",
        YEAR = 1994,
        PAGES = "432--439",
        annote = "The authors introduce four categories of distributed
                  system models (serial, synchronous, synchronized and
                  distributed) and present a technique that makes
                  verification of algorithms in the distributed model
                  much easier once they have been proven correct for
                  the serial model. [what's the idea behind this?]"
        }





@Book{Isermann:1994:UEF,
  ALTauthor = 	 {},
  editor = 	 {Rolf Isermann},
  title = 	 {{\"Uberwachung und Fehlerdiagnose --- Moderne
        Methoden und ihre Anwendungen bei technischen Systemen}},
  publisher = 	 {VDI-Verlag},
  year = 	 {1994},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {{D\"usseldorf}},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}

@Book{Jalote:1994:FDS,
  author =       "Pankaj Jalote",
  title =        "Fault tolerance in distributed systems",
  publisher =    pub-PH,
  year =         1994,
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  address =      pub-PH:adr,
  OPTedition =   "",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "Fine self-contained overview over the area of fault
                  tolerance in distributed systems. However, does not
                  mention self-stabilization with a single word."
}



@Article{Kindler:1994:SLP,
  author = 	 {Ekkart Kindler},
  title = 	 {Safety and Liveness Properties: {A} Survey},
  journal = 	 {EATCS-Bulletin},
  year = 	 {1994},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  number = 	 {53},
  OPTpages = 	 {},
  month = 	 jun,
  OPTnote = 	 {},
  annote = 	 {A brief (4 page) and very concise survey on the differences
                  and historical evolution of different notions of safety
                  and liveness.},
  url = "\url{http://www.informatik.hu-berlin.de/~kindler/PostScript/EATCS53.ps}"
}

@Article{Lamport:1994:HTW,
  title =        "How to Write a Long Formula (Short Communication)",
  author =       "Leslie Lamport",
  journal =      "Formal Aspects of Computing",
  volume =       "6",
  number =       "5",
  pages =        "580--584",
  year =         "1994",
  url =          "http://www.research.digital.com/SRC/personal/Leslie_Lamport/proofs/src119.dvi.Z",
  annote = "Lamport proposes a structured and hierarchical way to write
    long mathematical formulas. Nested parentheses are replaced by
    proper indentation, and formulas with infix operators are used in
    a prefix operator style if they are long. Also, the cases construct
    and the use of definitions is discussed. The only unsurety is how
    to write implications. This text previously appeared as DEC SRC 
    Research Report number 119.",
}

@InProceedings{Lamport:1994:SVF,
  author =       "Leslie Lamport and Stephan Merz",
  title =        "Specifying and Verifying Fault-Tolerant
                 Systems",
  booktitle =    "Formal Techniques in Real-Time and Fault-Tolerant
                 Systems",
  year =         "1994",
  editor =       "{H. Langmaack} and {W.-P. de Roever} and {J.
                 Vytopil}",
  pages =        "41--76",
  OPTorganization = "Third International Symposium Organized Jointly with
                 the Working Group Provably Correct Systems-ProCoS",
  volume =       "863",
  series =       ser-LNCS,
  publisher =    pub-SV,
  address =      "L{\"u}beck, Germany",
  month =        sep,
  annote = "An in-length exposition of a formal proof of the oral
  messages algorithm to the Byzantine Generals Problem
  \cite{Lamport:1982:BGP}. The problem is specified on three different
  levels of abstraction: (1) a general and high level description of
  the process' behaviors, given that they are loyal, (2) a mid-level
  description containing the algorithm description, and (3) a
  low-level description specifying how message exchange works. Proofs
  are given that each lower level specification implements the next
  higher level specification including the correctness theorem at the
  mid-level: if at most one traitor exists, then the high level
  specification is implemented by the mid level specification. It is
  interesting that the global fault assumption appears at the
  mid-level, which is conform with the fault-tolerant refinement idea
  of \cite{Peled:1994:CFF}. The discussion contains some concrete
  arguments to why TLA and hierarchically structured proofs can help
  engineers prove systems correct up to an acceptable level of
  trust. By introducing real-time, only safety properties need to be
  proved, making aspects of the original Byzantine failure model more
  explicit."
}

@Article{Lamport:1994:TLA,
  author =       "Leslie Lamport",
  title =        "{The Temporal Logic of Actions}",
  journal =      j-TOPLAS,
  volume =       "16",
  number =       "3",
  pages =        "872--923",
  month =        may,
  year =         "1994",
  annote = "Main reference to the syntax, semantics and merits of
    TLA. A good and increasingly exact overview starting from small
    examples, introducing temporal operators, fairness, composition,
    refinement, proof methods and rules, reasons not to use types,
    hiding of variables and some very interesting comments on
    mechanical verification, TLA vs. conventional programming
    languages, and comparisons with related formalisms. For a shorter
    introduction read \cite{Lamport:1994:ITT}."  
}



@TechReport{Lamport:1994:ITT,
  author = 	 {Leslie Lamport},
  title = 	 {Introduction to TLA},
  institution =  {Digital Systems Research Center},
  year = 	 {1994},
  OPTkey = 	 {},
  type = 	 {Technical Note},
  number = 	 {1994-001},
  address = 	 {Palo Alto, CA},
  month = 	 dec,
  OPTnote = 	 {},
  annote = 	 {A short and instructive primer of TLA omitting all the 
                  nitty gritty details. Starting point if you want to 
                  specify programs in TLA fast. Standard reference is 
                  \cite{Lamport:1994:TLA}.}
}

@InProceedings{Line:1994:MCS,
  author =       "JC Line and S Ghosh",
  title =        "A methodology for constructing a stabilizing
                 crash-tolerant application",
  booktitle =    pro-srds94,
  year =         "1994",
  pages =        "12--21",
  annote =       "[to read]"
}

@InProceedings{Line:1994:SAD,
  author =       "Jeffery C. Line and Sukumar Ghosh",
  title =        "Stabilizing Algorithms for Diagnosing Crash Failures",
  pages =        "376",
  booktitle =    pro-podc94,
  month =        aug,
  year =         "1994",
  annote =       "A simple stabilizing ``I am alive'' protocol is
		  presented for diagnosing a single crash failure in
		  at least strongly connected networks. The protocol
		  assumes channels with finite capacities and bounded
		  propagation delays. See also \cite{Arora:1995:TBS}."
}


@InCollection{Liu:1994:SDF,
  author = 	 {Zhiming Liu and Mathai Joseph},
  title = 	 {Stepwise Development of Fault-Tolerant Reactive
                 Systems},
  booktitle = 	 {Formal techniques in real-time and fault-tolerant systems},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {529--546},
  publisher = pub-SV,
  year = 	 {1994},
  OPTeditor = 	 {H. Langmaack and W.-P. de Roever and J. Vytopil},
  OPTvolume = 	 {},
  number = 	 {863},
  series = 	 ser-LNCS,
  OPTtype = 	 {},
  OPTchapter = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[to read]}
}


@InProceedings{Lo:1994:UFD,
  title =        "Using Failure Detectors to Solve Consensus in
                 Asynchronous Shared-Memory Systems (Extended
                 Abstract)",
  author =       "Wai-Kau Lo and Vassos Hadzilacos",
  booktitle =    pro-wdag94,
  editor =       "Gerard Tel and Paul M. B. Vit{\'a}nyi",
  address =      "Terschelling, The Netherlands",
  month =        "29~" # sep # "--1~" # oct,
  year =         "1994",
  series =       "Lecture Notes in Computer Science",
  volume =       "857",
  publisher =    pub-SV,
  ISBN =         "ISBN 3-540-58449-8",
  pages =        "280--295",
  annote =       "[to read]"
}

@Book{Lynch:1994:AT,
  author = 	 {Nancy A. Lynch and Michael Merritt and William Weihl 
                  and Alan Fekete},
  title = 	 {Atomic Transactions},
  publisher = 	 {Morgan Kaufmann, San Mateo, CA},
  year = 	 {1994},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[to read] :-)}
}



@article{Peled:1994:CFF,
     author = {Doron Peled and Mathai Joseph},
      title = {A Compositional Framework for Fault-tolerance by Specification
               Transformation },
    journal = {Theoretical Computer Science },
     volume = {128 },
       year = {1994 },
      pages = {99--125 },
    annote = "A fault-tolerant program is viewed as a fault-intolerant
    program enhanced by some fault-tolerance/recovery mechanism (like
    in \cite{Arora:1998:CDM,Arora:1998:DCT}). This can be viewed as a
    program transformation, i.e. a function $T$ that maps a
    fault-intolerant program $P$ to a fault-tolerant version
    $P'=T(P)$. However, introducing a recovery mechanism alters the
    original specification $S$ of $P$ to some augmented specification
    $S'$ which takes the behavior of the tolerance mechanism into
    account. So the effects of a tolerance mechanism on $S$ can be
    regarded as a specifictation transformation $F$ which maps $S$ to
    $S'=F(S)$. A program transformation $T$ and a specification
    transformation $F$ correspond, if for all programs $P$ and for all
    properties $p$, if $p$ holds for $P$ then $F(p)$ holds for $T(P)$.
    Now, for some corresponding transformations $T$ and $F$, if some
    property $p$ holds for $P$ and $F(p)$ implies property $q$, then
    $q$ holds for $T(P)$. So properties about $T(P)$ can be proved
    without looking at the code of $T(P)$ if $T$ and $F$ correspond.
    A specification transformation $F$ is said to be compositionally
    complete with respect to a program transformation $T$ if all
    properties can be proved in this way. Criteria for compositional
    completeness are given and depend on the monotonicity and the
    expressiveness of the specification language. --- The methodology
    is exemplified by the example of forward and backward recovery of
    distributed computations. A recovery algorithm is proposed and its
    corresponding specification transformation defined which is
    divided in a fixed part (e.g., eventually a snapshot will be taken
    (liveness) and the tolerance mechanism does not interfere with the
    original computation (safety)) and a part depending on the
    original specification. Then the basic program is transformed into
    a fault-tolerant program $P'$ and then to a fault-tolerant program
    in a faulty environment (the methods behind this is described in
    \cite{Liu:1992:TPF}). Then some simple properties of the
    fault-tolerant program are verified by applying the transformation
    $F$ to them and using the above proof rule. These properties are
    usually weaker since faults may deem the original properties
    unachievable (they however do not say how to derive them in
    general). The authors give criteria how to verify that $T$ and $F$
    actually correspond. They also discuss modularity issues:
    variables of the recovery algorithm can be omitted from the
    specification by a method of concealment. Fairness is an open
    problem since imposing a fault-tolerance mechanism and invoking it
    on faults can destroy fairness guarantees that held for the
    untransformed program. The paper uses interleaving semantics with
    a formalism coming from the area of (temporal) logic and
    concurrency. A fine paper."
}

@InProceedings{Ruget:1994:CMC,
  title =        "Cheaper Matrix Clocks",
  author =       "Fr{\'e}d{\'e}ric Ruget",
  booktitle =    pro-wdag94,
  editor =       "Gerard Tel and Paul M. B. Vit{\'a}nyi",
  address =      "Terschelling, The Netherlands",
  month =        "29~" # sep # "--1~" # oct,
  year =         "1994",
  series =       "Lecture Notes in Computer Science",
  volume =       "857",
  publisher =    pub-SV,
  ISBN =         "ISBN 3-540-58449-8",
  pages =        "355--369",
  annote =       "[to read]"
}

@article{Rushby:1994:CSP,
        AUTHOR = {John Rushby},
        TITLE = {Critical System Properties: Survey and Taxonomy},
        JOURNAL = {Reliability Engineering and System Safety},
        YEAR = 1994,
        VOLUME = 43,
        NUMBER = 2,
        PAGES = {189--219},
        annote = "Although quite long, this is a very insightful and
         rewarding survey of various notions of ``critical systems''
         from the broad literature. First, Rushby compares the four
         distinct approaches to critical systems that have emerged:
         (1) dependability/fault tolerance, (2) safety engineering,
         (3) secure systems, (4) real time systems. The dependability
         approach includes the usual notion of fault tolerance that a
         system should not deviate from its system specification if
         faults occur. The system specification can also be degraded
         resulting in a well-defined failure behavior (or failure
         semantics). The central method to achieve this is application
         of redundancy. Faults are categorized in fault models or
         failure semantics of subcomponents and there is a tradeoff
         between the fault types and the number of faults that can
         occur for a given level of redundancy. (``a quad-redundant
         Byzantine fault-tolerant system can withstand a single fault
         of any kind, whereas a differently organized quad-redundant
         system can withstand as many as three crash faults, but no
         other kind.'' [Hybrid models can help here.]) Managing
         redundancy requires coordination, which is difficult. Method
         to fight transient faults (self-stabilization) and design
         faults are also discussed. (2) The safety engineering
         approach is concerned with the occurrence of unplanned
         events. Safety means here that the system does no harm of any
         kind. Safety is achieved through hazard analysis (either
         reasoning backwards from a catastrophe or reasoning forward
         from a component failure). This can also be done for
         software, resulting in software fault tree analysis
         (SFTA). The advantage of this approach is that it explicitly
         considers the system context. A ``fail-safe'' operation is
         desired and achieved through a safe step-by-step operation
         based on a notion of locks (``lockin'', ``lockout''). While
         dependability ``tries to maximize the extent to which the
         system works well'', safety engineering ``tries to minimize
         the extent to which it can fail badly'' (p.13f). Thus
         dependability is natural in circumstances in which there is
         ``no safe alternative to normal service'' (like in aircraft
         control). (3) The secure systems approach holds up the
         protection of secrets and privacy. This includes a notion of
         integrity. Methods to achieve this are usually based on
         kernelization. This is analogous to fault containment in
         dependability. (4) The real-time systems approach needs to
         ensure deadlines and ``jitter'' (i.e. a certain quality of
         outputs). Real-time systems are organized as cyclic
         executives of a fixed number of processes in a fixed schedule
         (which has a number of disadvantages described on p. 20) or a
         preemptive and priority driven schemes that dominate today
         (especially a method called rate monotonic scheduling where
         priorities are derived from iteration rates). Both methods
         are compared on p. 23. There are relations especially between
         hard-real-time and masking fault tolerance. In Chapter 3
         Rushby surveys formal models for critical system properties
         and assurance methods. These include formalizations of
         security (via access control mechanisms), fault tolerance and
         real time. Formal notions of properties are usually based on
         traces (although security for example can be seen as a higher
         level property, see p. 29). Fault tolerance formalizations
         are either calculational (like \cite{Arora:1993:CCF}),
         i.e.~they calculate the effects of faults and see whether
         resulting executions are still ``safe'', or specificational,
         i.e.~the fault-tolerance specification is composed of the
         failure semantics of the subsystems (like
         \cite{Herlihy:1991:SGD}). More references to the literature
         are given on p.~30. Formalizations of real-time properties
         are usually based on some form of temporal logic and model
         checking (there are also versions of such logics that take
         time intervals into account, see p.~33f). Assurance
         techniques must take random and systematic failures into
         account to calculate some reliabilility measure (which for
         critical systems is in the order of $10^{-9}$ probability of
         failure during one hour operation). Direct measurement and
         testing is ruled out because of these high demands (testing
         would require some 100.000 years to meet these
         measures). Calculational approaches on the other hand contain
         many (``only'', p.~37) subjective factors such as the
         examination of the lifecycle process. Formal methods can be
         used to guarantee formal correctness but nobody can give real
         evidence for attaching some reliability number (this is a
         good quote, p.~38). Finally, Rushby provides a taxonomy of
         critical system properties based on interaction and coupling
         which laxly said is the necessity of flexibility versus the
         flexibility offered by the system. Overall this is one of my
         top ten favourite papers because it offers an understandable
         overview with well-chosen and well-explained examples,
         written in fine language and without the usual academic
         high-nose. The pages refer to the printed version from the
         web page http://www.csl.sri.com/reports/html/csl-93-1.html"
}

@TechReport{Sabel:1994:SFS,
  title =        "Simulating Fail-Stop in Asynchronous Distributed
                 Systems",
  author =       "Laura S. Sabel and Keith Marzullo",
  number =       "TR94-1413",
  year =         "1994",
  month =        mar,
  institution =  "Cornell University, Computer Science Department",
  pages =        "24",
  abstract =     "The fail-stop failure model appears frequently in the
                 distributed systems literature. However, in an
                 asynchronous distributed system, the fail-stop model
                 cannot be implemented. In particular, it is impossible
                 to reliably detect crash failures in an asynchronous
                 system. In this paper, we show that it is possible to
                 specify and implement a failure model that is
                 indistinguishable from the fail-stop model from the
                 point of view of any process within an asynchronous
                 system. We give necessary conditions for a failure
                 model to be indistinguishable from the fail-stop model,
                 and derive lower bounds on the amount of process
                 replication needed to implement such a failure model.
                 We present a simple one-round protocol for implementing
                 one such failure model, which we call simulated
                 fail-stop.",
  annote =       "Published as \cite{Sabel:1994:SFA} and at
		  PoDC94. Not readily available on the net. See summary of
		  \cite{Sabel:1994:SFA}."
}

@InProceedings{Sabel:1994:SFA,
  author =       "Laura S. Sabel and Keith Marzullo",
  title =        "Simulating Fail-Stop in Asynchronous Distributed
                 Systems",
  pages =        "138--147",
  booktitle =    pro-srds94,
  month =        oct,
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos, Ca., USA",
  year =         "1994",
  annote =       "Abstract in \cite{Sabel:1994:SFS}. The authors
		  present a method how to ``implement'' the fail-stop
		  failure model in asynchronous environments. Because
		  this task is impossible, they give a version of a
		  failure model that is indistinguishable from
		  fail-stop and call it simulated fail-stop. The
		  system model is based on the asynchronous crash
		  model with reliable FIFO channels. Processes have a
		  local `crash' variable and a `failed' vector which
		  should reflect the `crash' values of all other
		  processes. They define the failed-before relation in
		  terms of these variables: i failed before j in a run
		  iff at j failed[i] is true and remains true in that
		  run. The indistinguishability of runs bases on the
		  definitions of \cite{Chandy:1986:HPL}. The fail-stop
		  failure model is defined using two conditions: (FS1)
		  A processes failure is eventually detected by all
		  processes that don't crash. (FS2) There are no false
		  detections. The authors derive three necessary
		  conditions for indistinguishability of FS: (C1) If a
		  process i detects the crash of a process j, then
		  eventually j will crash. (C2) The failed-before
		  relation is acyclic. (C3) A crash event happens
		  before no other event. These are not sufficient
		  conditions, as shown by a run that meets C1--C3 and
		  is distinguishable from FS. However, the authors
		  give another set of sufficient conditions which are
		  not all necessary: weakening FS1 is not possible
		  because this may prevent progress, so FS2 is
		  weakened into four condistions: (FS2a) If a process i
		  suspects the crash of process j, then eventually j
		  will crash; this in conjunction with FS1 this
		  implies C1. (FS2b) The failed-before relation is
		  acyclic; this is C2. (FS2c) A process never detects
		  its own failure. (FS2d) Once i detects the failure
		  of j, then all messages sent by i to any process k will
		  not be recived until k has also detected the failure
		  of j; c and d together imply C3. The authors give a
		  simple protocol that implements these
		  conditions. The central idea is to form an agreement
		  on the suspicions by using intersecting quorum sets
		  of processes. This mainly ensures C2. The size of
		  such a quorum set must be strictly greater that
		  $n(\frac{t-1}{t})$, where n is the number of
		  processes and t is the maximum number of processes
		  that may fail. The authors relate these results to
		  the failure detector hierarchy of
		  \cite{Chandra:1996:UFD}: the fail-stop model is 
		  equivalent to having a perfect failure detector
		  (PFD), and the properties that are proposed are
		  those of a strong failure detector (SFD). So while a
		  PFD cannot be implemented by a SFD, an
		  indistinguishable failure detector can be
		  implemented. Here's a nice citation: ``A failure
		  model describes the manner in which the components
		  of a system can fail.'' (Sect. 3)"
}

@Article{Schepers:1994:TCP,
  author =       "Henk Schepers and Jozef Hooman",
  title =        "A trace-based compositional proof theory for fault
                 tolerant distributed systems",
  journal =      "Theoretical Computer Science",
  volume =       "128",
  number =       "1-2",
  pages =        "127--157",
  day =          "6",
  month =        jun,
  year =         "1994",
  corpsource =   "Dept. of Math. and Comput. Sci., Eindhoven Univ. of
                 Technol., Netherlands",
  keywords =     "alternating bit protocol; compositional formalism;
                 distributed processing; exceptional behaviour; failure
                 hypothesis; fault tolerant computing; fault tolerant
                 distributed systems; formal specification; formal
                 verification; input behaviour; network completeness;
                 output behaviour; reasoning; safety property
                 specification; software reliability; soundness; theorem
                 proving; trace-based compositional proof theory; triple
                 modular redundant system",
  annote = "The authors introduce a rigorous formalism allowing to
  prove safety properties of fault tolerant systems. This is done by
  extending a formalism used to reason about normal behavior (such as
  \cite{Hoare:1984:CSP}) with a single rule by which a component
  specification is weakened to reflect its faulty
  behavior. Prerequisite is a precise characterization of faulty
  behavior, which is done using a reflexive relation on normal and
  faulty traces. The method is specificational \cite{Rushby:1994:CSP}
  and at the system interface level describing a specification
  transformation. Examples (stuck at zero, message corruption, message
  loss) are given. Formally, a failure hypothesis is a reflexive
  relation on normal behavior, preserving prefix closure and effecting
  only the components of the failed process. A failure hypothesis can
  be used to derive the faulty behavior of a system. Examples which
  are proved safe are TMR and the alternating bit protocol. The proof
  system is shown to be sound and complete (didn't look at the
  proofs). As said above, only safety properties are
  handled. Compositional reasoning about liveness is difficult
  \cite{Abadi:1993:CS}. Future work states that it would be nice to
  have a logic to express failure hypotheses more
  elegantly. \cite{Schepers:1993:CPT} extends this work to also cover
  real time. Overall an interesting paper, probably the Journal
  version of \cite{Schepers:1993:TFT}, citing all the prominent
  players of the time
  \cite{Joseph:1987:PRF,Liu:1993:SVR,Nordahl:1993:DFD,Peleska:1991:DVF,Weber:1989:FSF}
  and the Conference version of \cite{Peled:1994:CFF}."
}

@InProceedings{Schiper:1994:PPV,
  author =       "A. Schiper and A. Sandoz",
  title =        "Primary Partition ``Virtually-synchronous
                 Communication'' Harder than Consensus",
  series =      ser-LNCS,
  number =       "857", 
  pages =        "39--52",
  year =         "1994",
  booktitle = pro-wdag94,
  annote =       "The authors formally define the primary partition
		  virtually synchronous communication problem (PP-VSC)
		  and show that it is harder to solve than consensus
		  in the sense that PP-VSC is solvable whenever
		  consensus is solvable but there are situations where
		  consensus is solvable and PP-VSC is not. PP-VSC
		  consists of 6 condistions that formalize the
		  following intuition: views are sets of processes. in
		  PP-VSC every process has the same view $V$ (as
		  opposed to the partial VSC problem). Assume that a
		  new view $V'$ has to be defined (because a process
		  from $V$ is assumed to have crashed for
		  example). Then all processes in both $V$ and $V'$
		  must have delivered the same set of messages in view
		  $V$ before delivering the new view $V'$. The system
		  model used is the asynchronous model enhanced with
		  failure suspectors as defined by Chandra and Toueg
		  \cite{Chandra:1996:UFD}." 
}

@Article{Schiper:1994:SSP,
  title =        "Strong Stable Properties in Distributed Systems",
  author =       "Andr{\'e} Schiper and Alain Sandoz",
  journal =      j-DC,
  pages =        "93--103",
  year =         "1994",
  volume =       "8",
  number =       "2",
  annote =       "[to read]"
}



@Book{Schuessler:1994:DS,
  author = 	 {H. W. {Sch\"u\ss{}ler}},
  ALTeditor = 	 {},
  title = 	 {{Digitale Signalverarbeitung}},
  publisher = 	 pub-SV,
  year = 	 {1994},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Berlin},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}

@Article{Schwarz:1994:DCR,
  author =       "Reinhard Schwarz and Friedemann Mattern",
  title =        {Detecting causal relationships in distributed
                  computations: in search of the holy grail},
  journal =      j-DC,
  year =         1994,
  OPTkey =       {},
  volume =       7,
  OPTnumber =    {},
  OPTmonth =     {},
  pages =        "149--174",
  OPTnote =      {},
  annote =       "A well written and extensive survey about the
                  intrinsic problems in detecting causal relationships
                  in distributed systems. First, causality and vector
                  time is explained and how both relate to the notion
                  of real time. Then implementation aspects of vector
                  time are discussed. Next, the authors focus on the
                  evaluation of global predicates and show that the
                  truth of such a predicate depends on the
                  observer. Different modalities of predicates are
                  surveyed (including the well known `possibly' and
                  `definitely') and present a few algorithms for
                  predicate detection. The bibliography section
                  contains 74 (!) references, so this paper can be
                  used as a starting point for own research. Overall,
                  the authors manage to show that dealing with
                  distributed systems is a complex and intriguing
                  undertaking."
}


@Book{Tel:1994:IDA,
  author =       {Gerard Tel},
  title =        {Introduction to Distributed Algorithms},
  publisher =    {Cambridge University Press},
  year =         1994,
}

@Article{Walther:1994:OPT,
  author = 	 {Christoph Walther},
  title = 	 {On Proving the Termination of Algorithms by Machine},
  journal = 	 {Artifical Intelligence},
  year = 	 {1994},
  volume = 	 {7},
  pages = 	 {101--157},
  annote = {Walther presents a method to prove the termination of a
     class of normal sequential algorithms in a fully automatic
     fashion. The algorithms are formulated in a functional
     programming language and the idea behind this method seems to be
     to derive a well-founded ordering relation on recursive calls by
     some heuristics based on size reduction. His method produces
     hypothesis suitable for proving with an automatic theorem
     prover. The method handles only algorithms that ``strongly''
     terminate (a definition I have not understood) and here not for
     all strongly terminating ones. However, strong termination is a
     practical restriction since all programs that do not have
     recursive calls in the conditions of cases and do not have nested
     recursive calls strongly terminate. The paper contains also an
     overview over older work on (automatic) termination proofs, such
     as a reference to Floyd's idea of termination functions
     \cite{Floyd:1967:AMP}, the first mentioning of the term
     ``convergence function'' \cite{Manna:1974:AAT} and comparison
     work of termination proving methods \cite{Katz:1975:CLT}. }
}



@Book{Yang:1994:GST,
  editor = 	 {Zhonghua Yang and T. Anthony Marsland},
  title = 	 {Global States and Time in Distributed Systems},
  publisher = 	 {IEEE Computer Society Press},
  year = 	 {1994},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {Contains all the classics on the subject, e.g. 
   \cite{Chandy:1985:DSD,Cooper:1991:CDG,Garg:1994:DWU,Mattern:1989:VTG}.}
}

@InCollection{??:1995:FLP,
  author = 	 "???",
  title = 	 "Summary of the discussion sessions: FLP and real time",
  OPTcrossref =  "",
  OPTkey = 	 "",
  booktitle = "Theory and Practice in Distributed Systems",
  publisher = pub-SV,
  year = 	 "1995",
  editor = 	 "K. P. Birman and F. Mattern and A. Schiper",
  OPTvolume = 	 "",
  number = 	 "938",
  series = 	 ser-LNCS,
  OPTtype = 	 "",
  OPTchapter = 	 "",
  pages = 	 "260--261",
  OPTaddress = 	 "",
  OPTedition = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "Summary of a discussion session from the Dagstuhl
		  workshop on theory and practice of distributed
		  systems. Discusses ways to circumvent the FLP result
		  \cite{Fischer:1985:IDC} and the various notions of
		  real time in distributed systems. This includes a
		  mention of failure detectors, timing assumption
		  coverage, real-time scheduling."
}


@Article{Abadi:1995:CS,
  author =       "Mart{\'\i}n Abadi and Leslie Lamport",
  title =        "Conjoining Specifications",
  journal =      j-TOPLAS,
  volume =       "17",
  number =       "3",
  pages =        "507--534",
  month =        may,
  year =         "1995",
  url =          "http://www.acm.org/pubs/toc/Abstracts/0164-0925/201069.html",
  abstract =     "We show how to specify components of concurrent
                 systems. The specification of a system is the
                 conjunction of its components' specifications.
                 Properties of the system are proved by reasoning about
                 its components. We consider both the decomposition of a
                 given system into parts, and the composition of given
                 parts to form a system.",
  annote = "The basis of this and other research
    \cite{Abadi:1993:CS,Lamport:1989:SAS} is that programs and their
    specifications are formulas in a temporal logic (this idea is
    attributed to Pnueli \cite{Pnueli:1981:TSC}). If specifications
    allow stuttering steps, then $A\Rightarrow B$ asserts that $A$
    implements $B$. So checking the correctness of a program can be
    done within the logic. Parallel composition can then be seen as
    conjunction. When dealing with composite systems there are two
    cases to consider: (1) when starting with a composite
    specification $M$ we want to decompose it into ``subcomponents''
    $M_a$ and $M_b$ where $M_a\land M_b \Rightarrow M$. Decomposition
    usually results in slight modifications (due to communication) of
    $M_a$ and $M_b$ resulting in subcomponents $M_a^l$ and $M_b^l$. We
    want to prove that $M_a^l\land M_b^l\Rightarrow M_a\land M_b$, but
    unfortunately this involves reasoning about the full low level
    protocol. Rather we could make use of the fact that we have a
    decomposition and rather prove $M_a^l\Rightarrow M_a$ and
    $M_b^l\Rightarrow M_b$ to prove our result. But this is not always
    valid. The Decomposition Theorem on page 527 states that we can
    deduce $M_a^l\land M_b^l\Rightarrow M_a\land M_b$ from three
    things: (a) $E_a\land M_a^l\Rightarrow M_a$, (b) $E_b\land
    M_b^l\Rightarrow M_b$, and (c) $M_a\land M_b\Rightarrow E_a\land
    E_b$. (2) The second case to consider is when we start with a set
    of subcomponents and want to reason about the specification of the
    composed system. Given two components as an assumption/guarantee
    specification $E_a\Rightarrow M_a$ and $E_b\Rightarrow M_b$, then
    we would like to deduce that the composed system satisfies
    $M_a\land M_b$ if one is taken as the environment of the
    other. This reasoning is however only valid if $E_a$ and $E_b$ are
    safety properties. This fact is discussed more elaborately in
    \cite{Abadi:1993:CS}. The context in which this reasoning is done
    is TLA \cite{Lamport:1994:TLA}."
}




@InProceedings{Alvarez:1995:ODA,
  author = 	 {Guillermo A. Alvarez and Flaviu Cristian and Shivakant 
                  Mishra},
  title = 	 {on-demand asynchronous atomic broadcast},
  booktitle = 	 {Proceedings of the 5th IFIP Working Conference on 
                  Dependable Computing and Critical Applications},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1995},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Urbana-Champaign, IL},
  month = 	 sep,
  OPTorganization = {},
  OPTpublisher = {},
  url = "ftp://ftp.cs.ucsd.edu/pub/grad/galvarez/papers/ondemand.ps.Z",
  OPTnote = 	 {},
  annote = 	 {Focusses on practical performance issues. [to read]}
}

@INPROCEEDINGS{Arora:1995:DMF,
        AUTHOR = "Anish Arora and Sandeep S. Kulkarni",
        TITLE = "Designing masking fault-tolerance via
         nonmasking fault-tolerance",
        BOOKTITLE = pro-srds95,
        YEAR = 1995,
        PAGES = "174--185",
        annote = "Appeared later in the IEEE Transactions on Software
                  Engineering \cite{Arora:1998:DMF}."
        }


@InProceedings{Arora:1995:ECC,
  author =       "Anish Arora and Mohamed Gouda",
  title =        "Load balancing: an exercise in constrained convergence",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  pages =        "183--197",
  booktitle = pro-wdag95,
  year =         "1995",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  OPTmonth =     "",
  URL =          "ftp://ftp.cis.ohio-state.edu/pub/anish/papers/load-balancing.ps.gz",
  annote =       "Stepwise design of distributed load balancing
                  algorithms from specifications using the paradigm of
                  constrained convergence."
}



@InProceedings{Arora:1995:TBS,
  author = 	 "Anish Arora and David M. Poduska",
  title = 	 "A timing-based schema for stabilizing information exchange",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  OPTpages = 	 "",
  booktitle = "Proceedings of the Third International Conference on
		  Computer Networks, Tokyo, Japan",
  year = 	 "1995",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "The authors construct a sequel of increasingly
		  complex stabilizing information exchange protocols:
		  first a solution for the adjacency problem is given,
		  which is extended to a connectivity protocol and
		  finally to a general information exchange protocol
		  that may be used to detect and establish certain
		  predicates in the system. This schema can be used to
		  implement commitment, leader election, spanning tree
		  construction (i.e., all locally checkable
		  specifications). It is closely related to the
		  paradigm of information propagation. Interesting is
		  the discussion of real time properties: the guarded
		  command notation is extended to specify real time
		  bounds on actions and methods for specifying and
		  proving timeliness properties of algorithms are
		  discussed. The underlying system model uses
		  synchronized clocks and channels with bounded
		  message delay."
}

@Article{Babaoglu:1995:SVD,
  author =       "{\"Ozalp} {Babao\u{g}lu} and Michel Raynal",
  title =        "Specification and Verification of Dynamic Properties
                 in Distributed Computations",
  journal =      "Journal of Parallel and Distributed Computing",
  volume =       "28",
  number =       "2",
  pages =        "173--185",
  month =        aug,
  year =         "1995",
  keywords =     "Boolean algebra; Boolean predicates;
                 causality-preserving order; classes; debugging;
                 distributed algorithms; distributed applications;
                 distributed computations; distributed systems; dynamic
                 property specification; dynamic property verification;
                 dynamic reconfiguration; formal; global predicate;
                 global system states; interval-constrained sequences;
                 program; program debugging; program testing; simple
                 sequences; specification; verification",
  annote =       "The authors investigate the specification and
		  detection of a new class of dynamic properties:
		  these are simple sequences (causality preserving
		  sequences of global states) and interval-constrained
		  sequences (simple sequences with undesired states in
		  the middle). They give algorithms that efficiently
		  detect these predicates based on the usual
		  construction algorithms of the lattice of global
		  states \cite{Cooper:1991:CDG}. The paper contains a
		  good analysis of the inherent costs of constructing
		  the lattice and detecting the predicates and relates
		  their (and others') methods to temporal logics. The
		  discussion section argues that increases expressive
		  power of the observable predicates will always
		  result in an increased cost of detecting it,
		  however, that the worst case analysis is not very
		  realistic since the communication patterns of for
		  example programs using RPC result in very lean
		  lattices."
}






@Article{Birman:1995:RTC,
  author =       {Kenneth P. Birman and Bradford B. Glade},
  title =        {Reliability through consistency},
  journal =      j-IEEE-SOFTWARE,
  year =         {1995},
  OPTkey =       {},
  OPTvolume =    {},
  OPTnumber =    {},
  month =        {May},
  pages =        {29--41},
  OPTnote =      {},
  annote =       "This paper argues that consistency is a key to fault
                  tolerant applications. In particular, consistent
                  failure reporting is important. Different levels of
                  consistency are defined (stabilization consistency
                  [i.e. the system stabilizes to a consistent state],
                  piecewise consistency [i.e. causal consistency], and
                  uniform consistency [i.e. atomic
                  consistency]). Current systems (such as Unix,
                  Chorus, Windows NT, DCE and CORBA, Mach, ISIS and
                  others) are assessed for their consistency
                  guarantees. Implementation difficulties are discussed."
}



@Book{Bishop:1995:NNP,
  author = 	 {Ch. M. Bishop},
  ALTeditor = 	 {},
  title = 	 {Neural Networks for Pattern Recognition},
  publisher = 	 {Clarendon-Press},
  year = 	 {1995},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Oxford},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}

@TechReport{Chandra:1995:UFD,
  author =       "Tushar Chandra and Sam Toueg",
  title =        "Unreliable Failure Detectors for Reliable Distributed
                 Systems",
  number =       "TR95-1535",
  year =         "1995",
  month =        aug,
  institution =  "Cornell University, Computer Science Department",
  pages =        "51",
  abstract =     "We introduce the concept of unreliable failure
                 detectors and study how they can be used to solve
                 Consensus in asynchronous systems with crash failures.
                 We characterise unreliable failure detectors in terms
                 of two properties --- completeness and accuracy. We
                 show that Consensus can be solved even with unreliable
                 failure detectors that make an infinite number of
                 mistakes, and determine which ones can be used to solve
                 Consensus despite any number of crashes, and which ones
                 require a majority of correct processes. We prove that
                 Consensus and Atomic Broadcast are reducible to each
                 other in asynchronous systems with crash failures; thus
                 the above results also apply to Atomic Broadcast. A
                 companion paper shows that one of the failure detectors
                 introduced here is the weakest failure detector for
                 solving Consensus [CHT92].",
}



@Article{Charron-Bost:1995:LTP,
  author =       "Bernadette Charron-Bost and Carole Delporte-Gallet
                  and Hugues Fauconnier",
  title =        "Local and temporal predicates in distributed systems",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-TOPLAS,
  year =         "1995",
  volume =       "17",
  number =       "1",
  pages =        "157--179",
  month =        jan,
  OPTnote =      "",
  annote =       "This is an intrinsic paper combining general
                  knowledge theory and predicate detection in
                  distributed systems. The authors re-visit Cooper and
                  Marzullo's \cite{Cooper:1991:CDG} predicate
                  transformers `possibly' and `definitely',
                  investigate their properties and show how they
                  relate to the predicate transformer `process p knows
                  phi'. The also define the important notion of a
                  predicate being local to some process set (i.e. the
                  truth value depends only on the local states of that
                  set) and show that knowledge is local (i.e. local
                  predicates are knowledge predicates and vice
                  versa). The results show an interesting analogy
                  between knowledge predicates (which are local and
                  thus ``spatial'') and the temporal predicates
                  `possibly' and `definitely'. Also, a special type of
                  predicates (called `observer independent') is
                  investigated which are easily detectable: observer
                  independent predicates are such for which possibly
                  and definitely coincide. They show that a predicate
                  which is local to one process is observer
                  independent, as well as the disjunction of observer
                  independent predicates. Interestingly, these results
                  show that ``a process never forgets''. Overall, this
                  is a very formal, but nevertheless rewarding article
                  offering some surprising insights, but a little
                  lengthy missing a few ``real-world'' examples (see
                  the article by Haplern and Moses
                  \cite{Halpern:1990:KCK} for one with lots of
                  examples)."
}

@InProceedings{Chase:1995:EDR,
  title =        "Efficient Detection of Restricted Classes of Global
                 Predicates",
  author =       "Craig M. Chase and Vijay K. Garg",
  booktitle =    pro-wdag95,
  editor =       "Jean-Michel H{\'e}lary and Michel Raynal",
  address =      "Le Mont-Saint-Michel, France",
  month =        sep,
  year =         "1995",
  series =       ser-LNCS,
  volume =       "972",
  publisher =    pub-SV,
  ISBN =         "ISBN 3-540-60274-7",
  pages =        "303--317",
  annote =       "[to read]"
}


@Article{Cristian:1995:ABF,
  title =        "Atomic Broadcast: From Simple Message Diffusion to
                 {Byzantine} Agreement",
  author =       "Flaviu Cristian and Houtan Aghili and Ray Strong and
                 Danny Dolev",
  pages =        "158--179",
  journal =      "Information and Computation",
  month =        apr,
  year =         "1995",
  volume =       "118",
  number =       "1",
  annote =       "The authors present three timed atomic broadcast
		  algorithms with increasing fault tolerance
		  properties: (1) timestamped message diffusion based
		  on flooding, tolerant against a limited number of
		  crash/omission failures; (2) timestamped message
		  diffusion with hop count, tolerant against timing
		  failures; (3) timestamped message diffusion with hop
		  count and authentication, tolerant against
		  authentication detectable Byzantine failures. All
		  protocols provide timely dissemination up to network
		  partition. Two lower bounds prove that (1) the time
		  needed for atomic broadcast to terminate in a
		  network of diameter $x$ is $O(x)$ (limited number of
		  crash/omission failures, network stays connected);
		  (2) any atomic broadcast protocol with $n$
		  processors that tolerates $n-2$ authentication
		  detectable Byzantine processor failures cannot have
		  a termination time smaller than
		  $(n-1)\cdot\delta$. Conclusions contain references
		  to other work on atomic broadcast and shows the
		  alternative between diffusion based and
		  acknowledgement based protocols. The authors also
		  argue that bounded reaction time is incompatible
		  with partitions. The derivational presentation of
		  the algorithms reminds of \cite{Hadzilacos:1994:MAF}
		  and is very rewarding."
}







@Article{Dolev:1995:DFC,
  author = 	 {Danny Dolev and Joseph Y. Halpern and Barbara Simons and
                  Ray Strong},
  title = 	 {Dynamic Fault-Tolerant Clock Synchronization},
  journal = 	 J-ACM,
  year = 	 {1995},
  OPTkey = 	 {},
  volume = 	 {42},
  number = 	 {1},
  pages = 	 {143--185},
  month = 	 jan,
  OPTnote = 	 {},
  annote = 	 {Proposes a new algorithm for clock synchronization.
    First gives a good overview over other algorithms: mostly they are
    averaging methods reqiring $3f+1$ nodes or $2f+1$ if authentication
    is available. There are also phase locking algorithms, where 
    nodes periodically broadcast their time and others set their
    clock to that time. Assumptions are bounded drift rate between
    local hardware clocks, and an upper bound on message transmission
    time.  A tolerance specification of linear envelope synchronization
    is given on p. 150.  The algorithm they give is late extended to
    also handle processor joins, it can tolerate any number of faults
    provided the correct processes stay connected.  Overall a very 
    rigorous paper, gives a good impression of clock synchronization 
    up to today. }
}

@InProceedings{Dolev:1995:SCS,
  author =       "Shlomi Dolev and Jennifer L. Welch",
  title =        "Self-stabilizing clock synchronization in the presence
                 of {Byzantine} faults",
  booktitle =    "Proceedings of the Second Workshop on Self-Stabilizing
                 Systems",
  pages =        "9.1--9.12",
  year =         "1995",
  annote =       "It is known that clock synchronization in Byzantine
		  environments requires $3f+1$ processors if $f$ is
		  the number of faulty processors. Protocols exist for
		  these cases. In this paper the authors investigate
		  the problem under a more severe failure assumption:
		  apart from $f$ processors being faulty, any form of
		  transient faults may happen to the system. They
		  present two probabilistic protocols that synchronize
		  clocks in a system under these assumptions. In
		  effect, these protocols are self-stabilizing. The
		  protocols cause the local clocks to converge into a
		  given margin within time exponential to the total
		  number of processes. Because they investigate
		  arbitrary transient faults, they also use bounded
		  clocks that wrap around periodically. They also
		  present an interesting application of the Chinese
		  Remainder Theorem for implementing a distributed
		  counter."
}




@InCollection{Echtle:1995:TFT,
  author = 	 "Klaus Echtle and Martin Leu",
  title = 	 "Test of fault tolerant distributed systems by fault
		  injection",
  OPTcrossref =  "",
  OPTkey = 	 "",
  booktitle = "Fault-Tolerant Parallel and Distributed Systems",
  publisher = pub-IEEE,
  year = 	 "1995",
  editor = 	 "D. Pradhan and D. Avresky",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  OPTtype = 	 "",
  OPTchapter = 	 "",
  pages = 	 "244--251",
  OPTaddress = 	 "",
  OPTedition = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "This paper presents a method to efficiently produce
		  test cases for fault injection to test fault
		  tolerant algorithms for design faults. Usually, the
		  number of test cases is very large, because every
		  branch and all paths of a program must be
		  explored. Here, the number of test cases is reduced
		  by two methods: (1) instead of analyzing the full
		  program, an abstraction of it is considered. The
		  abstraction is modeled by a timed Petri net and
		  omits the description of nodes assumed to be
		  faulty. (2) Test cases are generated from this Petri
		  net by constructing the reachability graph and
		  semiautomatically cutting off paths that are
		  semantically unjustified (because for example timing
		  assumptions violate the given failure model). The
		  resulting test cases are in a sense ``complete'' and
		  significantly less than brute force approaches
		  yield. It is interesting how the behavior of faulty
		  nodes is modeled on the abstraction level: nothing
		  can be assumed about their behavior, resembling
		  Byzantine behavior. The test cases can subsequently
		  be used to test the implemented system and spare the
		  developer from tedious full verification of the
		  algorithm with all its low-level details. The work
		  in this paper is related to ground-breaking work of
		  Echtle in 1984 \cite{Echtle:1984:FSV}."
}

@InProceedings{Fetzer:1995:PCA,
  author = 	 "Christof Fetzer and Flaviu Cristian",
  title = 	 "On the possibility of consensus in asynchronous systems",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  OPTpages = 	 "",
  booktitle = "Proceedings of the 1995 Pacific Rim International
		  Symposium on Fault-Tolerant Systems",
  year = 	 "1995",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  month = 	 dec,
  OPTnote = 	 "",
  annote = 	 "The authors show that consensus is possible in the
		  timed asynchronous system model together with the
		  ``always eventually majority-stable progress
		  assumption''. They argue that this adequately reflects
		  todays networked workstations, i.e., that todays
		  networks are not completely asynchronous. The work
		  is related to other work that adds synchrony to the
		  time free model, claims to be closest to
		  \cite{Dwork:1988:CPP} (the ``global stabilization
		  model'') and does not relate in depth to
		  \cite{Chandra:1992:WFD} or \cite{Chandra:1991:UFD}
		  because ``the model considered there is time-free,
		  [it] assumes that properties of failure detectors
		  eventually always hold, and [it] does not include
		  processor restarts."
}

@InProceedings{Guerraoui:1995:NBA,
  author = 	 "Rachid Guerraoui and Mikel Larrea and {Andr\'e} Schiper",
  title = 	 "Non blocking atomic commitment with an unreliable
		  failure detector",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  OPTpages = 	 "",
  booktitle = pro-srds95,
  year = 	 "1995",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  month = 	 sep,
  OPTnote = 	 "",
  annote = 	 "The authors present a solution to the non-blocking atomic
		  commitment problem in asynchronous systems using 
		  failure detectors. A commit protocol is a consensus
		  protocol with favour of abort, and it is
		  non-blocking meaning that only all surviving members
		  need to commit. The authors adapt Chandra and
		  Toueg's consensus algorithm \cite{Chandra:1996:UFD}
		  to solve atomic commitment. Necessary prerequisites
		  for termination are therefore eventually weak
		  failure detectors and a majority of correct processes."
}

@InProceedings{Guerraoui:1995:RRB,
  title =        "Revisiting the Relationship Between Non-Blocking
                 Atomic Commitment and Consensus",
  author =       "Rachid Guerraoui",
  booktitle =    pro-wdag95,
  editor =       "Jean-Michel H{\'e}lary and Michel Raynal",
  address =      "Le Mont-Saint-Michel, France",
  month =        "13--15~" # sep,
  year =         "1995",
  series =       "Lecture Notes in Computer Science",
  volume =       "972",
  publisher =    pub-SV,
  ISBN =         "ISBN 3-540-60274-7",
  pages =        "87--100",
  annote =       "[to read]"
}

@InCollection{Guerraoui:1995:TMV,
  author = 	 "Rachid Guerraoui and {Andr\'e} Schiper",
  title = 	 "Transaction model vs virtual synchrony model:
		  bridging the gap",
  OPTkey = 	 "",
  booktitle = "Theory and Practice in Distributed Systems",
  publisher = pub-SV,
  year = 	 "1995",
  editor = 	 "K. P. Birman and F. Mattern and A. Schiper",
  OPTvolume = 	 "",
  number = 	 "938",
  series = 	 ser-LNCS,
  OPTtype = 	 "",
  OPTchapter = 	 "",
  pages = 	 "121--131",
  OPTaddress = 	 "",
  OPTedition = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "[to read]"
}

@INPROCEEDINGS{Gouda:1995:TTS,
	AUTHOR = "Mohamed G. Gouda",
	TITLE = "The triumph and tribulation of system
	 stabilization",
	BOOKTITLE = pro-wdag95,
	YEAR = 1995,
	PAGES = "1--18",
        annote = "reviews 10 years of stabilization research."
	}


@Article{Halpern:1995:RAK,
  author =       "Joseph Y. Halpern",
  title =        "Reasoning about Knowledge: {A} Survey",
  editor =       "D. M. Gabbay and C. J. Hogger and J. A. Robinson",
  booktitle =    "Handbook of Logic in Artificial Intelligence and Logic
                 Programming, Volume 4: Epistemic and Temporal
                 Reasoning",
  pages =        "1--34",
  publisher =    "Oxford University Press",
  year =         "1995",
  annote =       "[to read]"
}

@InProceedings{Isermann:1995:OFL,
  author = 	 {Rolf Isermann},
  title = 	 {On Fuzzy Logic Applications for Automatic Control,
                  Supervision and Fault Diagnosis},
  booktitle = 	 {Proceedings of the Third European Congress on Intelligent
                  Techniques and Soft Computing (EU-FIT)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {738--753},
  year = 	 {1995},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Aachen},
  OPTmonth = 	 {},
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}




@InProceedings{Jegou:1995:LSA,
  author = 	 {Roland {J\'egou} and Raoul Medina and Lhouari Nourine},
  title = 	 {Linear space algorithm for on-line detection of global
                  predicates},
  booktitle = 	 {Proceedings of the International Workshop on Structures 
                  in Concurrency Theory (STRICT)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {175--189},
  year = 	 {1995},
  editor = 	 {{J\"org} Desel},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  series = 	 {Workshops in Computing},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {}
}

@InProceedings{Julier:1995:NAF,
  author = 	 {Simon J. Julier and Jeffrey K. Uhlmann and Hugh F. 
                  Durrant-Whyte},
  title = 	 {A new approach for filtering nonlinear systems},
  booktitle = 	 {Proceedings of the 1995 American Control Conference},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {1628--1632},
  year = 	 {1995},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Seattle, WA},
  OPTmonth = 	 {},
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {Presents a method to replace the extended Kalman filter
                  \cite{Welch:1995:IKF} by some tricky prediction mechanism
                  that doesn't require to calculate Jacobian matrices.}
}

@Misc{Ladkin:1995:340,
  author = 	 {Peter Ladkin},
  title = 	 {Re: A340 incident at {Heathrow} {(Hatton, RISKS-16.92)}},
  howpublished = {The Risks Digest (Forum on Risks to the Public in Computers
                  and Related Systems)},
  month = 	 mar,
  volume =       16,
  number =       96,
  year = 	 {1995},
  url =          "\url{http://catless.ncl.ac.uk/Risks}",
  OPTnote = 	 {},
  annote = 	 {Describes in detail the Airbus A340 incident at Heathrow
                  in September 1994. During the approach, both display screens
                  in the cockpit went blank and displayed a message ``please
                  wait''. The pilots were still able to fly the plane, but
                  it's somewhat difficult without instrument feedback. The 
                  autopilot, which was subsequently switched on, tuned into
                  a ``false glidescope'', a side effect of the radio beam
                  used for landing the aircraft under instrument conditions.
                  This caused the aircraft to fly unusually high pitch rates.
                  The pilots subsequently turned off the autopilot and 
                  used a SRA (surveillance radar approach) where the plane
                  is ``talked'' down by the tower. They landed safely. Later
                  the logs of the computer system showed that there had been
                  near-to simultaneous faults in the two redundant flight
                  control systems leading to unexpected behavior (for 
                  example, the system also wrongly complained that it
                  was low on fuel). Airbus Industries is said to be 
                  aware that there are problems within the redundancy
                  management and that the failure of one computer can cause
                  a failure in the next.}
}


@Article{Lamport:1995:HWP,
  author =       "Leslie Lamport",
  title =        "How to write a proof",
  journal =      "American Mathematical Monthly",
  volume =       "102",
  number =       "7",
  pages =        "600--608",
  month =        aug # "\slash " # sep,
  year =         "1995",
  url =          "http://www.research.digital.com/SRC/personal/Leslie_Lamport/proofs/proofs.html",
  annote = "A way of writing proofs is presented that ``makes it much
    harder to prove things that are not true''. It is a structured
    proof writing method similar to proof trees of interactive theorem
    proving environments. The exposition and experience reports with
    this method are delightful. Prior version appeared as DEC SRC 
    Research Report number 94"
}

@InProceedings{LeLann:1995:ORN,
  author =       "Gerard {Le Lann}",
  title =        "On real-time and non real-time distributed computing",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =       "972",
  OPTseries =       ser-LNCS,
  pages =        "51--70",
  booktitle = pro-wdag95,
  year =         "1995",
  OPTorganization = "",
  OPTpublisher = pub-SV,
  OPTaddress =   "",
  month =        sep,
  OPTnote =         "Invited paper.",
  annote =       "The author explores the relationship between
		  problems in real-time and non real-time
		  computing. The distinction between both classes is
		  that real-time problems have a set of timeliness
		  constraints and their model has additional
		  restrictions on event releases. Timeliness
		  constraints are considered to be a composition of
		  safety and liveness properties. Two examples are
		  discussed: the asynchronous consensus problem (for
		  non real-time) and the hard real-time distributed
		  multiaccess channel problem. Both presentations,
		  especially that of the second example, are intricate
		  and tedious to understand. Finally, the author
		  identifies that timeliness constraints are related
		  to on-line scheduling strategies in the sense that
		  solutions in an asynchronous model may be
		  ``immersed'' into real-time environments by adding
		  special scheduling algorithms. This corresponds to a
		  distinction between design and implementation
		  phases. The paper contains a reference to the
		  distinct phases of diffusion and decision in
		  asynchronous consensus and relates them to knowledge
		  theoretic terms such as partial common knowledge. It
		  also discusses real-time equivalents of an
		  eventually weak failure detector. Overall, a paper
		  with lots of ideas, many typos and typographical
		  shortcomings (obviously hastily produced) and lots
		  of passages which I do not grasp."
}



@InCollection{Liu:1995:FFF,
  author = 	 {Zhiming Liu and Mathai Joseph},
  title = 	 {A formal framework for fault-tolerant programs},
  booktitle = 	 {Mathematics of Dependable Computing},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {131--148},
  publisher = {Oxford University Press},
  year = 	 {1995},
  editor = 	 {C. M. Mitchell and V. Stavridou},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTtype = 	 {},
  OPTchapter = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = {This is a book chapter incorporating ideas and results
  from a few other papers by the same authors. The beginning is the
  same as in \cite{Liu:1996:VFR}: program development is described as
  a sequence of refinement steps; action systems, TLA, specifications
  and the rest of the formalism is introduced. In contrast to
  \cite{Liu:1996:VFR} here also liveness properties are
  studied. Liveness properties result from imposing some fairness
  condition on the specification. Refinement mappings are defined
  along the lines of \cite{Lamport:1989:SAS}. Then faults and their
  effects are studied (as in \cite{Liu:1996:VFR}): physical faults are
  modeled as a set of actions which are scheduled concurrently with
  regular program actions, i.e. faults are isolated/separated from the
  program. Then the fault-tolerant refinement relation is discussed
  (like in \cite{Liu:1996:VFR}) and the distinction between global and
  local fault assumption (terms are attributed to Nordahl's thesis
  \cite{Nordahl:1992:SDD}). Global fault assumptions are always safety
  properties while local fault assumptions are safety and liveness
  properties (specified by state transitions). It is shown (as in
  \cite{Liu:1996:VFR}) that the global fault assumptions may be
  integrated into the fault actions: the specification of the
  fault-affected program is the conjunction of an (1) initial
  property, (2) the state transitions of the program and the faults,
  (3) the fault assumption and (4) the fairness property. The safety
  properties (2) and (3) can be encoded in a new state transition
  relation and thus are ``locally programmable''. Yes, separating
  local from global fault assumptions makes it easier to specify fault
  affected behaviors. But before proving the fault tolerance, the
  global assumption should be integrated into the transition
  system. Overall this is a version of \cite{Liu:1996:VFR} using the
  same examples but discussing liveness issues and not touching
  real-time. }
}

@Book{Manna:1995:TVR,
  author = 	 {Zohar Manna and Amir Pnueli},
  title = 	 {Temporal verification of reactive systems: safety},
  publisher = 	 pub-SV,
  year = 	 {1995},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {See also \cite{Manna:1991:TLR}. Where's liveness?}
}


@InCollection{Mattern:1995:NLI,
  author = 	 "Friedemann Mattern and Stefan {F\"unfrocken}",
  title = 	 "A non-blocking lightweight implementation of causal
                 order message delivery",
  OPTcrossref =  "",
  OPTkey = 	 "",
  booktitle = "Theory and Practice in Distributed Systems",
  publisher = pub-SV,
  year = 	 "1995",
  editor = 	 "K. P. Birman and F. Mattern and A. Schiper",
  OPTvolume = 	 "",
  number = 	 "938",
  series = 	 ser-LNCS,
  OPTtype = 	 "",
  OPTchapter = 	 "",
  pages = 	 "197--213",
  OPTaddress = 	 "",
  OPTedition = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "An implementation of causal order delivery using low
		  level FIFO buffers. Excludes some computations but
		  is very efficient."
}



@Book{Neumann:1995:CRR,
  author = 	 {Peter G. Neumann},
  title = 	 {Computer Related Risks},
  publisher = 	 {ACM Press},
  year = 	 {1995},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {A great collection of computer related incidents from many
                  areas (defense, space, aviation, etc.) affecting 
                  reliability, safety and security together with cause
                  analysis and a discussion about technical and social
                  countermeasures. A good source of information and also
                  a starting point for more because of the good references
                  (especially the RISKS forum).}
}


@TechReport{Sabel:1995:EVC,
  author =       "Laura S. Sabel and Keith Marzullo",
  title =        "Election Vs. Consensus in Asynchronous Systems",
  number =       "TR95-1488",
  year =         "1995",
  month =        feb,
  institution =  "Cornell University, Computer Science Department",
  pages =        "9",
  abstract =     "It was shown in 1985 that the {\em Consensus problem}
                 cannot be solved in an asynchronous system if even a
                 single crash failure can occur. In this paper, we show
                 that there are other problems that cannot be solved in
                 an asynchronous system, and for the same intuitive
                 reason: it is impossible to distinguish a very slow
                 processor from a crashed processor. However, these
                 problems are harder than Consensus, in that there are
                 contexts in which Consensus can be solved but these
                 other problems cannot. More precisely, the weakest
                 failure detector that is needed to solve these problems
                 is a Perfect Failure Detector, which is strictly
                 stronger than the weakest failure detector that is
                 needed to solve Consensus. We use a formulation of the
                 Election problem as the prototype for these problems
                 that are harder than Consensus.",
  annote = 	 "Contains a good and concise definition of failure
		  detectors \`a la Chandra and Toueg
		  \cite{Chandra:1996:UFD} in terms of temporal
		  logic. The proof idea is as follows: a failure
		  detector has very weak completeness iff eventually
		  every process that crashes is suspected at least
		  once by some correct process. The authors then show
		  that (1) strong accuracy and very weak completeness are
		  necessary to solve election, and (2) that both
		  together are sufficient. This shows that a strongly
		  complete and very weakly accurate failure detector
		  is the weakest failure detector necessary for
		  election. Very weak completeness and strong accuracy
		  however suffice to implement a perfect failure
		  detector. Thus the weakest failure detector for
		  election is stronger than the weakest failure
		  detector for consensus. Thus, election is harder
		  than consensus. Other problems as hard as election
		  are primary backup and (probably) terminating
		  reliable broadcast. "
}


@Article{Singhal:1995:OPA,
  author =       "Mukesh Singhal and Friedemann Mattern",
  title =        "An optimality proof for asynchronous recovery
                 algorithms in distributed systems",
  journal =      j-IPL,
  volume =       "55",
  number =       "3",
  pages =        "117--121",
  day =          "11",
  month =        aug,
  year =         "1995",
  keywords =     "Algorithms; asynchronous recovery; Asynchronous
                 recovery algorithms; Computation theory; Computer
                 networks; Computer simulation; Computer system
                 recovery; Consistent cut; consistent cut; Data
                 communication systems; Data processing; Distributed
                 computer systems; distributed processing; distributed
                 systems; Internal events; Message receive events;
                 Message send events; Optimality proof; optimality
                 proof; roll backs; system recovery",
  treatment =    "T Theoretical or Mathematical",
  annote = "[to read]"
}

@InProceedings{Stoller:1995:FPD,
  title =        "Faster Possibility Detection by Combining Two
                 Approaches",
  author =       "Scott D. Stoller and Fred B. Schneider",
  booktitle =    pro-wdag95,
  OPTeditor =       "Jean-Michel H{\'e}lary and Michel Raynal",
  month =        sep,
  year =         "1995",
  OPTseries =       ser-LNCS,
  OPTvolume =       "972",
  OPTpublisher =    pub-SV,
  OPTISBN =         "ISBN 3-540-60274-7",
  pages =        "318--332",
  annote =       "The main contribution of this paper is the best
		  in-depth investigation of the complexity of
		  possibility detection so far. The general algorithms
		  by Cooper an Marzullo \cite{Cooper:1991:CDG} have
		  worst case time complexity of $\Omega(S^N)$ where
		  $N$ is the number of processes and $S$ is the
		  maximum number of relevant events on every
		  process. This is because \emph{every} consistent
		  global state has to be investigated. However, as
		  shown for example by Garg and Waldecker
		  \cite{Garg:1994:DWU}, one can do better for
		  restricted types of predicates. In this paper, the
		  authors show an interesting decomposition property
		  of the set of global consistent states and an
		  application to possibility detection: a state $g$ is
		  globally consistent iff for any subset $F$ of
		  processes (1) the restriction of $g$ to $F$ is
		  concurrent to the restriction of $g$ to the
		  complement of $F$, and (2) the restriction of $g$ to
		  $F$ is a consistent global state in the computation
		  restricted to $F$, and (3) the restriction of $g$ to
		  the complement of $F$ is a consistent global state
		  in the computation restricted to the complement of
		  $F$. The idea now is to reformulate the detection
		  predicate and to specialize it with respect to some
		  subset $F$ of processes. Then, possibly(P) is
		  equivalent to choosing a set $F$ of processes,
		  choosing a constistent global state $g$ of the
		  computation restricted to $F$, and checking whether
		  possibly(P') holds in the computation restricted to
		  the complement of $F$, where P' denotes the
		  predicate P specialized to $g$. (Uff!) Now, having
		  such a fixed set $F$, a standard algorithm for
		  possibility detection can be run in ``smaller''
		  computations, but this has to be done as many times
		  as the computation restricted to $F$ has consistent
		  global states. So, the complexity of the resulting
		  algorithm depends on $|F|$ and is $O(S^{|F|+1})$
		  which is better than usual whenever
		  $|F|<N-1$. However, finding a minimal fixed set is
		  shown to be NP-complete and so only approximations
		  help in general (there is some work to be done
		  here). The authors additionally show that amoung all
		  formulas equivalent to $P$, the disjunctive normal
		  form (DNF) has minimum cost for possiblity detection
		  (every disjunct can be detected seperately). A few
		  enhancements are discussed, example applications are
		  given and some funny matrix multiplication method is
		  presented for off-line possibility
		  detection. Finally, a well-written section discusses
		  the inherent complexity of detecting possibly and
		  gives some good references. Overall, this is a paper
		  that at some points supersedes my own abstraction
		  bounds and swims in a theoretical ocean which is
		  very wide."
}



@Article{Verissimo:1995:QSS,
  author = 	 {Paulo Ver{\'\i}ssimo and Carlos Almeida},
  title = 	 {Quasi-synchronism: a step away from the traditional 
                  fault-tolerant real-time system models},
  journal = 	 {Bulletin of the Technical Committee on Operating Systems 
                  and Application Environments (TCOS)},
  year = 	 {1995},
  OPTkey = 	 {},
  volume = 	 {7},
  number = 	 {4},
  pages = 	 {35--39},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {The ideas herein appear in a more general and elaborate
                  form in \cite{Almeida:1998:QSA}.}
}

@TechReport{Welch:1995:IKF,
  author = 	 {Greg Welch and Gary Bishop},
  title = 	 {An Introduction to the {Kalman} filter},
  institution =  {University of North Carolina at Chapel Hill, 
                  Department of Computer Science},
  year = 	 {1995},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {TR 95-041},
  address = 	 {Chapel Hill, NC 27599-3175},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {This paper provides an introduction to the concept
    of a Kalman filter for the non-expert. A Kalman filter can be used
    to estimate the state of a discrete linear process in noisy 
    environments. If the process is non-linear, an extended Kalman
    filter is used that assumes linearity on intervals of process
    behavior. The extended Kalman filter requires calculating the
    Jacobian matrix of derivates of the process modeling function.
    A new approach to filtering nonlinear systems that does not
    require calculating these matrices is described in 
    \cite{Julier:1995:NAF}.}
}

@Article{Wiederhold:1995:MIS,
  author =       "Gio Wiederhold",
  title =        "Mediation in Information Systems",
  journal =      j-ACM-COMP-SURVEYS,
  volume =       "27",
  number =       "2",
  pages =        "265--267",
  month =        jun,
  year =         "1995",
  url =          "http://www.acm.org/pubs/toc/Abstracts/0360-0300/210390.html",
  annote =       "discusses sensor/actuator approach [to read]"
}

@InProceedings{Zhou:1995:FNP,
  author =       "Jianying Zhou and Dieter Gollmann",
  title =        "A Fair Non-repudiation Protocol",
  keywords =     "non-repudiation, trusted third party",
  pages =        "55--61",
  year =         "1996",
  booktitle =    "Proceedings of the IEEE Symposium on Security and Privacy",
  address =      "Oakland, CA",
  year =         "1996",
  publisher =    pub-IEEE,
  month =        may,
  organization = "IEEE Computer Society,Technical Committee on Security
                 and Privacy",
  series =       "Research in Security and Privacy",
  annote ="something like active exchange \cite{Buerk:1990:VES}, [to get]"
}


@InProceedings{Aguilera:1996:RFD,
  title =        "Randomization and Failure Detection: {A} Hybrid
                 Approach to Solve Consensus",
  author =       "Marcos Kawazoe Aguilera and Sam Toueg",
  booktitle =    pro-wdag96,
  editor =       "{\"O}zalp Babaoglu and Keith Marzullo",
  address =      "Bologna, Italy",
  month =        "9--11~" # oct,
  year =         "1996",
  series =       "Lecture Notes in Computer Science",
  volume =       "1151",
  publisher =    pub-SV,
  ISBN =         "ISBN 3-540-61769-8",
  pages =        "29--39",
  annote =       "[to read]"
}



@InProceedings{Almeida:1996:TFD,
  author = 	 {Carlos Almeida and Paulo Ver{\'\i}ssimo},
  title = 	 {Timing Failure Detection and Real-Time Group 
                  Communication in Quasi-Synchronous Systems },
  booktitle = 	 {Proceedings of the 8th Euromicro Workshop on 
                  Real-Time Systems},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1996},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {L'Aquila, Italy},
  month = 	 jun,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {A newer and more elaborate treatment of this topis 
                  can be found in \cite{Almeida:1998:QSA}.}
}

@Article{Arora:1996:CSB,
  author =       "Anish Arora and Mohamed G. Gouda and George Varghese",
  title =        "Constraint satisfaction as a basis for designing
                  nonmasking fault-tolerant systems",
  OPTcrossref =  "Arora:1994:CSB",
  OPTkey =       "",
  journal =      "Journal of High Speed Networks",
  year =         "1996",
  volume =       "5",
  number =       "3",
  pages =        "293--306",
  OPTmonth =     "",
  OPTnote =         "A preliminary version appeared at ICDCS94.",
  annote =       "Probably the same as \cite{Arora:1994:CSB} but more
                  citeable."
}

@InProceedings{Ayache:1996:FMV,
  author =       "S. Ayache and E. Conquet and P. Humbert and C.
                 Rodriguez and J. Sifakis and R. Gerlich",
  title =        "Formal Methods for the Validation of Fault Tolerance
                 in Autonomous Spacecraft",
  pages =        "353--359",
  ISBN =         "0-8186-7261-7",
  booktitle =    pro-ftcs96,
  month =        jun # "25--27~",
  publisher =    "IEEE",
  address =      "Washington",
  year =         "1996",
  annote =       "[to read]"
}


@Article{Babaoglu:1996:UFS,
  author = 	 "{\"Ozalp} {Babao\u{g}lu} and Eddy Fromentin and
		  Michel Raynal",
  title = 	 "A unified framework for the specification and
		  run-time detection of dynamic properties in
		  distributed computations",
  OPTcrossref =  "",
  OPTkey = 	 "",
  journal = 	 "Journal of Systems and Software",
  year = 	 "1996",
  volume = 	 "33",
  OPTnumber = 	 "",
  pages = 	 "287--298",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "The authors present a general framework with which
		  to detect a large class of properties of distributed
		  computations. Abstractly, property detection can be
		  seen as searching through an evolving directed
		  acyclic graph (DAG) which labelled nodes. If the
		  nodes carry labels according to specific properties,
		  the detection problem can be formulated as an
		  instance of the language recognition problem. This
		  counts for all properties that are expressible as
		  regular languages. The framework can be used to
		  detect properties of computations based on sequences
		  of local states (control flows). It can also be used
		  to detect properties defined on sequences of
		  consistent global states. Thus, the method is a
		  generalization of the property detection approaches
		  of Cooper and Marzullo \cite{Cooper:1991:CDG} for
		  possibly and definitely
		  \cite{Babaoglu:1993:CGS,Schwarz:1994:DCR}. The
		  detection methods are based on mapping an accepting
		  automaton onto the nodes. For properties of control
		  flow it suffices to add an array of bits (of the
		  order of the set of states of the accepting
		  automaton) to every node and message and have a
		  distributed controller running and updating the
		  array at every node. If the larger class of
		  properties on sequences of global states is to be
		  detected, the authors employ a central monitoring
		  process (like in \cite{Cooper:1991:CDG}) that
		  incrementally constructs the lattice of consistent
		  states. The nodes of the lattice are an array of
		  bits (one for every state of the accepting
		  automaton). While the previous approach is
		  practically feasible and implemented (in the EREBUS
		  distributed debugger process mentioned in the
		  acticle), the detection of sequences of global
		  states seems to be infeasible. However, properties
		  on single global states (like possibly and
		  definitely) do not need the expressibility of
		  regular languages and some detecting these sorts of
		  predicates can be feasible in practice. The authors
		  see their method as a sort of on-the-fly model
		  checking that have no idea of the model they are
		  checking against. Overall, I like this paper very
		  much: it is concise and mathematically sound, uses a
		  minimal set of examples ans strives for theoretical
		  excellence."
}

@Book{Barbosa:1996:IDA,
  author =       "Valmir C. Barbosa",
  title =        "An Introduction to Distributed Algorithms",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "1996",
  keywords =     "book, text, parallel processing, supercomputers,
                 computer algorithms,",
}

@TechReport{Basu:1996:SPP,
  title =        "Solving Problems in the Presence of Process Crashes
                 and Lossy Links",
  author =       "Anindya Basu and Bernadette Charron-Bost and Sam
                 Toueg",
  year =         "1996",
  month =        sep,
  pages =        "30",
  institution =  "Cornell University, Computer Science Department",
  number =       "TR96-1609",
  abstract =     "We study the effect of link failures on the
                 solvability of problems in asynchronous systems that
                 are subject to process crashes: given a problem that
                 can be solved in a system with process crashes and
                 reliable links, is the problem solvable even if links
                 are lossy? We answer this question for two types of
                 lossy links, and show that the answer depends on the
                 maximum number of processes that may crash and the
                 nature of the problem to be solved. In particular, we
                 prove that the answer is positive if fewer than half of
                 the processes may crash or if the problem specification
                 does not refer to the state of processes that crash.
                 However, in general, the answer is negative even if
                 each link can loose only a finite number of
		  messages.",
  annote =       "A shorter version appeared at WDAG-10
		  \cite{Basu:1996:SRL} which is summarized there and
		  not in this bibliographic entry."
}



@InProceedings{Basu:1996:SRL,
  author = 	 "Anindya Basu and Bernadette Charron-Bost and Sam Toueg",
  title = 	 "Simulating reliable links with unreliable links in
		  the presence of process crashes",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  pages = 	 "105--122",
  booktitle = pro-wdag96,
  year = 	 "1996",
  OPTorganization = "",
  publisher =    pub-SV,
  address = 	 "Bologna, Italy",
  month = 	 oct,
  OPTnote = 	 "",
  annote = 	 "The authors investigate the question, what problems
		  that are solvable with reliable links and possible
		  process crashes remain solvable in the presence of
		  unreliable links. They investigate two types of
		  unreliable links: eventually reliable (there is a
		  time after which the link becomes reliable = finite
		  message loss), and fair lossy (if an infinite number
		  of messages is sent over a channel, then an infinite
		  number of messages is received at the other end =
		  infinite message loss). Intuitively, a reliable link
		  is also eventually reliable, and an eventually
		  reliable link is also fair lossy. The authors show
		  two things: (1) there are problems (e.g. uniform
		  reliable broadcast) that are solvable with reliable
		  channels but are not solvable with eventually reliable
		  channels. This means that, in general, eventually
		  reliable links cannot simulate reliable links. (2)
		  if the majority of processes in the system is
		  correct, then fair lossy links can simulate reliable
		  links. The key idea behind this fact is that
		  processes must infinitely often diffuse their
		  message histories. This is however very inefficient
		  (requires unbounded storage capacity in nodes and
		  unbounded message length). In general, this is a
		  paper which reveals again the importance of
		  correct-restricted problems (problems in which only
		  correct processes are required to do something),
		  because correct-restricted problems remain solvable
		  even with fair lossy links."
}



@InProceedings{Beauquier:1996:MFH,
  author = 	 {Joffroy Beauquier and {Synn\"ove} Kekkonen},
  title = 	 {Making {FTSS} is hard},
  booktitle = 	 {Proceedings of the International Conference on Software
                  Engineering (ICSE'96)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {91--96},
  year = 	 {1996},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Las Vegas, USA},
  month = 	 jul,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {Possibly similar to brief announcement \cite{Beauquier:1997:OFS}. See also \cite{Beauquier:1997:FTS,Kekkonen:1998:RFA}.}
}

@InProceedings{Camp:1996:AAT,
  author = 	 {Jean Camp and Micheal Harkavy and J. D. Tygar and 
                  Bennet Yee},
  title = 	 {Anonymous atomic transactions},
  booktitle = 	 {Proceedings of the 2nd USENIX Workshop on Electronic 
                  Commerce},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {123--133},
  year = 	 {1996},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  month = 	 nov,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {[to write]}
}



@InProceedings{Chandra:1996:IGM,
  author =       "Tushar Deepak Chandra and Vassos Hadzilacos and Sam
                 Toueg and Bernadette Charron-Bost",
  title =        "On the Impossibility of Group Membership",
  pages =        "322--330",
  booktitle =    pro-podc96,
  ISBN =         "0-89791-800-2",
  month =        may,
  publisher =    "ACM",
  address =      "New York, USA",
  year =         "1996",
  OPTnote =         "Also published as Technical Report TR95-1548,
                  Cornell University.",
  annote =       "The authors show, that the problem of weak group
                  membership (WGM) is impossible in the asynchronous
                  system model used by Fischer et. al. in their famous
                  impossibility proof of consensus
                  \cite{Fischer:1985:IDC}. WGM is defined having two
                  properties: (liveness) if processes want to leave the
                  group, at least one other process must install a new
                  view of the group and no process installs a view
                  different from it; (safety) it must be possible that the
                  new view installed is correct. Impossibility of WGM
                  is especially noteworthy because it is at the core
                  of many group communication systems (e.g. Isis and
                  Transis). That's what makes this paper worthwile
                  citing."
}

@Article{Chandra:1996:UFD,
  author =       "Tushar Deepak Chandra and Sam Toueg",
  title =        "Unreliable failure detectors for reliable
                  distributed systems",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-ACM,
  year =         "1996",
  volume =       "43",
  number =       "2",
  pages =     "225--267",
  month =        mar,
  OPTnote =      "",
  annote =       "Journal version of \cite{Chandra:1991:UFD}."
}



@Article{Chandra:1996:WFD,
  author =       "Tushar Deepak Chandra and Vassos Hadzilacos and Sam
                  Toueg",
  title =        "The weakest failure detector for solving consensus",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-ACM,
  year =         "1996",
  volume =       "43",
  number =       "4",
  pages =     "685--722",
  month =        jul,
  OPTnote =      "",
  annote =       "Journal version of \cite{Chandra:1992:WFD}."
}





@InProceedings{Charpentier:1996:ACR,
  author = 	 "Michel Charpentier and Mamoun Filali and Philippe
		  Mauran and {G\'erard} Padiou and Philippe {Qu\'einnec}",
  title = 	 "Abstracting communication to reason about
		  distributed algorithms",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  pages = 	 "89--103",
  booktitle = pro-wdag96,
  year = 	 "1996",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "Concurrent programming formalisms like UNITY are
		  often based on locally shared variables as
		  communication primitives. While it is possible to
		  extend these models via definitions with message
		  passing primitives this is quite cumbersome. In this
		  paper the authors propose an abstraction of
		  communication based on observations: variable $x$ observes
		  $y$ if updates of $x$ reflect all the updates of $y$
		  but not necessarily in a tiemly manner, i.e.,
		  eventually $x$ will take on all values of $y$ in the
		  original order. Using this observation relation on
		  variables, the authors present inference rules for
		  the UNITY framework that can be used to prove that
		  distributed algorithms have certain properties. The
		  observation relation is interesting because it
		  abstracts from communication and a communication
		  topology and thus acts like a transport layer of
		  communication subsystems. No relations to knowledge
		  based protocol formalisms are discussed, although
		  they seem to have fundamental similarities."
}


@Article{Charron-Bost:1996:SAC,
  author =       {Bernadette Charron-Bost and Friedemann Mattern and
                  Gerard Tel},
  title =        {Synchronous, asynchronous, and causally ordered
                  communication},
  journal =      j-DC,
  year =         1996,
  volume =       9,
  pages =        "173--191",
  OPTannote =    {}
}


@InProceedings{Cristian:1996:GMS,
  author =       "Flaviu Cristian",
  title =        "Group, Majority, and Strict Agreement in Timed
                 Asynchronous Distributed Systems",
  pages =        "178--189",
  ISBN =         "0-8186-7261-7",
  booktitle =    "Proceedings of the Twenty-Sixth International
                 Symposium on Fault-Tolerant Computing",
  month =        jun # "25--27~",
  publisher =    "IEEE",
  address =      "Washington",
  year =         "1996",
  annote =       "[to read]"
}

@Article{Cristian:1996:SAG,
  author =       "Flaviu Cristian",
  title =        "Synchronous and Asynchronous Group Communication",
  journal =      j-CACM,
  volume =       "39",
  number =       "4",
  pages =        "88--97",
  month =        apr,
  year =         "1996",
  subject =      "{\bf H.5.3}: Information Systems, INFORMATION
                 INTERFACES AND PRESENTATION, Group and Organization
                 Interfaces, Asynchronous interaction. {\bf H.5.3}:
                 Information Systems, INFORMATION INTERFACES AND
                 PRESENTATION, Group and Organization Interfaces,
                 Synchronous interaction.",
  annote =       "[to read]",
}

@InProceedings{Dega:1996:RMA,
  author =       "Jean-Louis Dega",
  title =        "The redundancy mechanisms of the {Ariane} 5
                  operational control center",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  pages =        "382--386",
  booktitle = pro-ftcs96,
  year =         "1996",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "Dega reports on details of the ground control system
                  of the Ariane 5 project. The system is a fully
                  distributed real-time system built on top of an
                  off-the-shelf real-time operating system. It
                  controls the countdown procedure until 3 seconds
                  before the launch. The main system components are
                  duplicated twice (using hot/warm standby) and can be
                  repaired on-line. The main dependability requirement
                  is to be fail-safe, i.e. in case of critical
                  failures the system should stop in a safe state. The
                  probability of a serious event occuring during was
                  aimed to be $10^{-6}$. Main failure detection
                  functions are performed by the components themselves
                  (self-checking). The time constraints to hand over
                  from active to standby machine are less than 300
                  ms. The design of the system is another example for
                  safety being more important than liveness in
                  practical systems (see \cite{Kreitz:1998:SWL}.) See
		  also the notes on redundancy in a report on current
		  NASA work \cite{Marcopulos:1998:FBC}."
}




@TechReport{Dolev:1996:FDO,
  author = 	 "Danny Dolev and Roy Friedman and Idit Keidar and
		  Dahlia Malkhi",
  title = 	 "Failure detectors in omission failure environments",
  institution =  "Cornell University, Computer Science Department",
  year = 	 "1996",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTtype = 	 "",
  number = 	 "TR96-1608",
  OPTaddress = 	 "",
  month = 	 sep,
  OPTnote = 	 "",
  annote = 	 "[to read] studies also partitions, surveyed in
		  \cite{Aguilera:1998:FDC}. Published as a brief announcement
                  at PoDC 97 \cite{Dolev:1997:FDO}."
}



@Misc{ESA:1996:A5F,
  OPTkey = 	 {},
  author = 	 {{European Space Agency}},
  title = 	 {ARIANE 5 Flight 501 Failure},
  howpublished = {\url{http://www.esrin.esa.it/htdocs/tidc/Press/Press96/ariane5rep.html}},
  month = 	 jul,
  year = 	 {1996},
  note = 	 {Report by the Inquiry Board},
  OPTannote = 	 {}
}

@inproceedings{Fetzer:1996:FAT,
   author    = {Christof Fetzer and Flaviu Cristian},
   title     = {Fail-Awareness in Timed Asynchronous Systems},
   booktitle = pro-podc96,
   year      = {1996},
   month     = {May},
   address   = {Philadelphia},
   pages     = {314--321a},
   note      = {\url{http://www-cse.ucsd.edu/users/cfetzer/FA/fa.html}},
   annote = "Shows how to transform a synchronous specification $S$
     into a weakened specification $F$ which is implementable in
     timed-asynchronous systems. A synchronous specification is one
     which prescribes a real-time deadline for completion of the
     service. See \cite{Cristian:1999:TAD} for an explanation of
     the timed asynchronous system model."
}



@InProceedings{Fetzer:1996:FFD,
  author =       "Christof Fetzer and Flaviu Cristian",
  title =        "Fail-Aware Failure Detectors",
  pages =        "200--209",
  booktitle =    "Proceedings of the 15th Symposium on Reliable
                 Distributed Systems ({SRDS} 1996)",
  ISBN =         "0-8186-7481-4",
  month =        oct,
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos, Ca., USA",
  year =         "1996",
  annote =       "The authors report on the contradiction that
		  election has been proved requiring a perfect failure
		  detector \cite{Sabel:1995:EVC} but election seems
		  implementable in existing asynchronous systems
		  \cite{Fetzer:1995:PCA}. To resolve this
		  contradiction, they introduce a new class of
		  fail-aware failure detectors which together with
		  certain progress assumptions \cite{Fetzer:1995:PCA}
		  are sufficient to solve election and are
		  implementable in timed asynchronous systems
		  \cite{Cristian:1998:TAS}. Fail-aware failure detectors
		  are based on the idea that a process suspects itself
		  immediately if it is suspected by another process
		  (strong fail-awareness) or by a majority of other
		  processes (weak fail awareness). Together with the
		  strong completeness and eventual weak accuracy of
		  Chandra and Toueg \cite{Chandra:1996:UFD} both
		  attributes are sufficient to solve election in
		  asynchronous systems. The failure detectors have
		  infinite output domains and this resemble very much
		  those of \cite{Aguilera:1997:HTF}. The
		  fail-awareness property can be implemented in timed
		  asynchronous systems only when making progress
		  assumptions \cite{Fetzer:1995:PCA} which assume
		  strict synchrony for a ``sufficiently long'' period
		  of time. Also, these are the first failure detectors
		  that actually reference real time in their
		  specifications, which is a little confusing when
		  designing algorithms for the time-free
		  model. However, exact formal definitions in terms of
		  \cite{Chandra:1996:UFD} are given but the full
		  consequences of the definition in terms of whether
		  ``new'' and previously undiscovered features are
		  added are not discussed in depth."
}

@Book{Gabriel:1996:POS,
  author =       "Richard P. Gabriel",
  title =        "Patterns of {Software}. {Tales} from the {Software}
                  {Community}",
  publisher =    "Oxford University Press",
  year =         "1996",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  address =      "New York, Oxford",
  OPTedition =   "",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "This is a striking, sometimes shocking and sometimes
                  amusing collection of essays by a man who (as one of
                  the developers of Lisp) has closely been related to
                  the emergence of high level programming languages
                  and the entire software engineering discipline for
                  about 20 to 30 years. Gabriel not only gives
                  inspiring insights into the benefits of small
                  systems ({\`a} la Siefkes), good documentation ({\`a}
                  la Knuth's literate programming) and what makes a
                  programming language good vs. widely accepted, but
                  also tells instructive tales about the rise and fall
                  of his own company, Lucid, during the 90s. A well
                  readable book written in almost spoken language and
                  with sometimes a little ``diffusing'' sequences of
                  ideas, but with a lot of perfectly arguable points,
                  which makes this book a good starting point for
                  discussions on software engineering (along with
                  Brook's ``No Silver Bullet'' \cite{Brooks:1987:NSB})."
}



@Article{Garg:1996:DSU,
  author =       {V. K. Garg and Brian Waldecker},
  title =        {Detection of strong unstable predicates in
                  distributed programs},
  journal =      {IEEE Transactions on Parallel and Distributed Systems},
  year =         {1996},
  OPTkey =       {},
  volume =       {7},
  number =       {12},
  OPTmonth =     {},
  pages =        {1323--1333},
  OPTnote =      {},
  annote =       "Angaben aus \cite{Stoller:1997:DGP}."
}

@Book{Garg:1996:PDS,
  author = 	 {Vijay K. Garg},
  title = 	 {Principles of Distributed Systems},
  publisher = 	 {Kluwer Academic Publishers},
  year = 	 {1996},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Boston, MA},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@InProceedings{Guerraoui:1996:CS,
  author =       "R. Guerraoui and A. Schiper",
  title =        "Consensus Service: {A} Modular Approach for Building
                 Agreement Protocols in Distributed Systems",
  pages =        "168--177",
  ISBN =         "0-8186-7261-7",
  booktitle =    pro-ftcs96,
  month =        jun # "25--27~",
  publisher =    "IEEE",
  address =      "Washington",
  year =         "1996",
  annote =       "[to read]"
}

@InProceedings{Guerraoui:1996:GAF,
  title =        "``{Gamma}-Accurate'' Failure Detectors",
  author =       "Rachid Guerraoui and Andr{\'e} Schiper",
  booktitle =    "Distributed Algorithms, 10th International Workshop,
                 {WDAG} '96",
  editor =       "{\"O}zalp Babaoglu and Keith Marzullo",
  address =      "Bologna, Italy",
  month =        "9--11~" # oct,
  year =         "1996",
  series =       "Lecture Notes in Computer Science",
  volume =       "1151",
  publisher =    pub-SV,
  ISBN =         "ISBN 3-540-61769-8",
  pages =        "269--286",
  annote =       "[to read]"
}

@PhdThesis{Hoefling:1996:MFP,
  author = 	 {T. {H\"ofling}},
  title = 	 {{Methoden zur Fehlererkennung mit Parametersch\"atzung und
                  Parit\"atsgeleichungen}},
  school = 	 {Technische Hochschule Darmstadt},
  year = 	 {1996},
  OPTkey = 	 {},
  OPTtype = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {{erschienen als Fortschr. Ber. VDL, VDI-Verlag, 
                  D\"usseldorf}},
  OPTannote = 	 {}
}

@InProceedings{Hurfin:1996:ODC,
  author =       "Michel Hurfin and M. Mizuno and Michel 
                  Raynal and M. Singhal",
  title =        "On-the-Fly Detection of Conjunctions of Local
                 Predicates in Distributed Computations",
  pages =        "589--592",
  booktitle =    "Eighth {IEEE} Symposium on Parallel and Distributed
                 Processing ({SPDP}'96)",
  ISBN =         "0-8186-7683-3",
  month =        oct,
  publisher =    "IEEE Computer Society",
  address =      "Washington",
  year =         "1996",
  annote =       "[to get]"
}



@Article{Hutter:1996:VSE,
  author = 	 {Dieter Hutter and Bruno Langenstein and Claus Sengler
                  and {J\"org} H. Siekmann and Werner Stephan and 
                  Andreas Wolpers},
  title = 	 {Verification Support Environment {(VSE)}},
  journal = 	 {High Integrity Systems},
  year = 	 {1996},
  OPTkey = 	 {},
  volume = 	 {1},
  number = 	 {6},
  pages = 	 {523--530},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {Gives an overview of VSE-I. Good reference. For VSE-II,
                  better cite \cite{Hutter:1998:VSE}.}
}

@Article{Isermann:1996:MUE,
  author = 	 {Rolf Isermann},
  title = 	 {{Modellgest\"utzte \"Uberwachung und Fehlerdiagnose
        technischer Systeme}},
  journal = 	 {Automatisierungstechnische Praxis},
  year = 	 {1996},
  OPTkey = 	 {},
  volume = 	 {38},
  OPTnumber = 	 {},
  pages = 	 {9--20, 48--57},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}

@phdthesis{Janowski:1996:BAF,
        author = "T. Janowski",
         title = "Bisimulation and Fault-Tolerance",
        number = "CS-THESIS-JANOWSKI96",
          year = "1996",
         month = "February",
          type = "Thesis",
           url = "http://www.dcs.warwick.ac.uk/pub/reports/theses/jan96.html",
   school = "Department of Computer Science, University of Warwick",
       address = "Coventry, UK",
          note =  {Also University of Warwick Department of Computer 
                   Science Research Report CS-RR-300},
      abstract = { In the area of concurrent, communicating systems, a
      common approach to verify the absence of design faults is in
      terms of an equivalence relation between a high-level and a
      low-level process. One such relation is bisimulation and this
      holds if two processes cannot be distinguished by observing them
      for a finite interval of time. However, the absence of design
      faults does not guarantee that the process will behave correctly
      in practice as it depends on various hardware devices which may
      be subject to physical faults themselves. Such faults cannot be
      avoided; they must be tolerated. The purpose of this thesis is
      to provide a formal framework, based on bisimulations and using
      the Calculus of Communicating Systems, by which we can specify,
      design and verify concurrent, fault-tolerant systems, with
      emphasis placed on reasoning and design under weak assumptions
      about faults.  },
  annote = {[to get, requested from Warwick]}
}


@InProceedings{Liu:1996:VFR,
  author =       "Z. Liu and M. Joseph",
  title =        "Verification of Fault Tolerance and Real Time",
  pages =        "220--229",
  ISBN =         "0-8186-7261-7",
  booktitle =    pro-ftcs96,
  month =        jun,
  publisher =    "IEEE",
  address =      "Sendai, Japan",
  year =         "1996",
  annote = "Programs and specifications are viewed as formulas in the
  same logic (originally an idea of \cite{Pnueli:1981:TSC} explained
  in \cite{Abadi:1995:CS,Abadi:1993:CS,Lamport:1989:SAS}). The logic
  used here is TLA \cite{Lamport:1994:TLA} and the programming
  notation are action systems (i.e. simple automata). Both formalisms
  are related and it is shown how to transform action systems into
  TLA. Here, only TLA formulas are studied which are safety
  properties, i.e. only safety properties of action systems are
  discussed.  The development of a program $P$ from a specification
  $S$ can be viewed as a sequence of refinement steps $P<P_n<\ldots
  P_1=S$ starting with $S$ and ending with $P$ where in each step the
  lower level version of the program is shown to implement the higher
  level version (this is done using some refinement calculus,
  e.g. \cite{Abadi:1991:ERM}). A program $P$ which implements $S$ in
  fault-free operations may not do so in the presence of physical
  faults. Faults are modeled as a set $f$ of fault operations on the
  system state and the effect of faults is viewed as a transformation
  $F(P,f)$ which is an interleaved execution of $P$ and $f$. The
  transformed program is called the ``$f$-affected'' version of
  $P$. If the $f$-affected version of $P$ satisfies some property $q$
  and $q$ is the specification of some program $P'$ then $P$ is the
  $f$-tolerant refinement of $P'$, denoted $P<_f P'$. The relation
  $<_f$ is stronger that the ordinary refinement relation $<$ and
  generally is not reflexive (why?). But it is somewhat transitive: if
  $P_1<_{f_1}P_2$ and $P_2<_{f_2}P_3$ then $P_1<_{f_1}P_3$ ! Apart
  from an actions set $f$ a fault model requires a behavioral
  specification called ``behavioral fault assumption''. This is
  analogous to the rely specification of
  \cite{Voelzer:1998:VFT}. Generally, this is a safety property (as
  conjectured in \cite{Gaertner:1999:ESD}) so it can be
  ``implemented'' within $f$. Separation of fault actions and
  behavioral fault assumption usually makes specification
  easier. Proving the fault tolerance properties of some program
  results in proving that a program is a fault tolerant refinement of
  another. Real time is basicly handled by adding a clock and
  formulating real time properties as safety properties. Section 5
  discusses related work: \cite{Liu:1992:TPF} presents methods how to
  obtain fault-tolerant refinements of programs, other work is
  \cite{Liu:1994:SDF}. It is noted that these methods can be used to
  prove fault-tolerant algorithms using PVS. Transformational
  approaches are independent of formalism (\cite{Nordahl:1993:DFD}
  uses CSP, \cite{Janowski:1996:BAF} uses CCS."

}

@Book{Lynch:1996:DA,
  author =       {Nancy Lynch},
  title =        {Distributed Algorithms},
  publisher =    {Morgan Kaufmann, San Mateo, CA},
  year =         {1996}
}



@INPROCEEDINGS{Mizuno:1996:TBT,
        AUTHOR = "Masaaki Mizuno and Hirotsugu Kakugawa",
        TITLE = "A timestamp based transformation of self-stabilizing
         programs for distributed computing environments",
        BOOKTITLE = pro-wdag96,
        YEAR = 1996,
        PAGES = "304--321",
        annote = "In the serial model, an atomic execution step
                  consists of a read-sub-step, where processes read
                  the state of their neighbours, followed by a local
                  state change. Each process can always see the states
                  of one of its neighbours and only one process at a
                  time executes an atomic step. In the distributed
                  model, an atomic execution step is either a
                  read-sub-step or a local state change based on its
                  own state and the locally recorded neighbours'
                  states. In this paper the authors present a method
                  to transform an algorithm from the serial model to
                  an algorithm from the distributed model and show
                  that the self-stabilization property is preserved
                  during transformation. The idea of the scheme is to
                  simulate the serial model by imposing a
                  transaction-commit protocol on every execution step
                  of the original algorithm. As an execution step
                  corresponds to a transaction, a lot of theorems from
                  serializability theory may be applied. The criterion
                  to prove serializability of the transformed program
                  bases on timestamps from Lamport logical
                  clocks. Correctness and message complexity depends
                  on the usual prerequisites of reaching consensus and
                  the carefull choice of timeout values."
}



@inproceedings{Owre:1996:PVS,
        TITLE = {{PVS}: Combining Specification, Proof Checking, and
                Model Checking},
        AUTHOR = {S. Owre and S. Rajan and J.M. Rushby and N. Shankar
                and M.K. Srivas},
        BOOKTITLE = {Computer-Aided Verification, CAV '96},
        EDITOR = {Rajeev Alur and Thomas A. Henzinger},
        PAGES = {411--414},
        PUBLISHER = pub-SV,
        SERIES = {Lecture Notes in Computer Science},
        NUMBER = 1102,
        MONTH = {July/August},
        YEAR = 1996,
        ADDRESS = {New Brunswick, NJ}
}





@Book{Spies:1996:FSS,
  ALTauthor = 	 {},
  editor = 	 {Katharina Spies and Manfred Broy and Stephan Merz},
  title = 	 {Formal Systems Specification: The RPC-Memory Specification
                  Case Study},
  publisher = 	 pub-SV,
  year = 	 {1996},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  number = 	 {1169},
  series = 	 ser-LNCS,
  OPTaddress = 	 {},
  OPTedition = 	 {},
  month = 	 dec,
  OPTnote = 	 {},
  annote = 	 {A collection of papers from a Dagstuhl seminar 9439 in
                  1994 where a sample poblem is specified and verified
                  in many different formalisms. There's also a Dagstuhl
                  report with abstracts on this.}
}

@Book{Tanenbaum:1996:CN,
  author =       "Andrew S. Tanenbaum",
  title =        "Computer Networks",
  publisher =    pub-PH,
  year =         "1996",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  address =      pub-PH:adr,
  edition =      "Third",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "The well-known bestseller."
}




@InProceedings{Vogels:1996:WWF,
  author = 	 {Werner Vogels},
  title = 	 {World Wide Failures},
  booktitle = 	 {Proceedings of the ACM SIGOPS European Workshop},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1996},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Connemara, Ireland},
  month = 	 sep,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {Discusses practical concerns in bulding reliable failure
                  detectors. Explicitly references \cite{Chandra:1996:UFD}
                  and presents some timeout measurements. It is said that
                  a full paper with detailed results will appear soon.
                  Has ist already? A good complement to 
                  \cite{Sergent:1999:FDI}.}
}

@InProceedings{Zhou:1996:FNP,
  author =       "Jianying Zhou and Dieter Gollmann",
  title =        "A Fair Non-repudiation Protocol",
  keywords =     "non-repudiation, trusted third party",
  pages =        "55--61",
  year =         "1996",
  booktitle =    "Proceedings of the IEEE Symposium on Research in Security 
                 and Privacy",
  address =      "Oakland, CA",
  year =         "1996",
  publisher =    pub-IEEE,
  month =        may,
  organization = "IEEE Computer Society,Technical Committee on Security
                 and Privacy",
  annote =       "[to get]"
}

@InProceedings{Afek:1997:LS,
  author =       "Yehuda Afek and Shlomi Dolev",
  title =        "Local Stabilizer",
  booktitle =    pro-podc97,
  pages =        "287--?",
  year =         "1997",
  annote =       "The authors present a protocol module which can be
		  imposed onto arbitrary round based algorithms and
		  turn it into a self-stabilizing algorithm. This is
		  done much in the spirit of Katz and Perry
		  \cite{Katz:1993:SEM} by using a detection protocol
		  and a repair protocol. The detection protocol part
		  sends the complete state of a node to all its
		  neighbours in every round. After $d$ rounds, and by
		  forwarding states from neighbours, every node will
		  be able to construct a pyramid of local snapshots of
		  all nodes within diameter $d$ of itself. Level $k$
		  of the pyramid reflects the state of $k$-distant
		  node before $k$ rounds. Not only the state is
		  forwarded, but also the inputs to the node before
		  that round, so a remote node can check what the node
		  in question was supposed to be doing and can detect
		  inconsistencies immediately. On detecting an
		  inconsistency, the repair mechanism freezes the
		  outer network and diffuses the ``right'' state to
		  all processes within the infected portion of the
		  network. In case this is not possible (because the
		  majority of nodes has been perturbed for example), a
		  reset procedure is invoked. This paper contains some
		  very clever ideas that have a lot of potential for
		  optimization. The pyramid of states however and
		  checking the consistency implies that every node
		  does all the computations of all other
		  nodes. Together with the round based model this
		  implements an omniscient observer at every node that
		  takes snapshots at the beginning of every
		  round. Because of the round synchronization, these
		  snapshots must be identical at every node. Thus
		  inconsistencies can be detected. Stabilization time
		  is fast, but a huge amount of space needed."
}

@InProceedings{Aguilera:1997:HTF,
  author =       "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg",
  title =        "Heartbeat: a timeout-free failure detector for
                  quiescent reliable communication",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =       "1320",
  OPTseries =       ser-LNCS,
  pages =        "126--140",
  booktitle = pro-WDAG97,
  year =         "1997",
  OPTorganization = "",
  OPTpublisher = "Springer",
  OPTaddress =   "",
  month =        sep,
  OPTnote =      "",
  annote =       "The authors consider the problem of reliable
                  communication within quiescent algorithms,
                  i.e. algorithms that eventually stop sending
                  messages, in asynchronous systems with lossy
                  links. They solve the problem using a novel failure
                  detector called `heartbeat'. This failure detector
                  is a vector of size $n$ within each node, where $n$
                  is the number of neighbours the node has (one entry
                  per neighbour). The value of slot $i$ increments if
                  an alive signal (message) has been received by
                  neighbour $i$. It is shown that reliable
                  communication can be achieved in such settings using
                  heartbeat but it seems that the problems of timeouts
                  and synchrony are moved one level downwards. The
                  authors argue that this is okay since the failure
                  detector may be shared by other system modules and
                  there is no `terminating' version of failure
                  detectors anyway. The authors claim that heartbeat
                  is implementable and give evidence in which they use
                  the term ``periodically'' instead of
                  ``timeout''. The main novelty with heartbeat is that
                  it has an infinite range, i.e. it outputs infinite
                  values (in contrast to previous versions that output
                  finite lists of suspects). Apart from this last
                  point, this paper is a good
                  starting point for finding literature on failure
                  detection."
}




@TechReport{Aguilera:1997:QRC,
  title =        "Quiescent Reliable Communication and Quiescent
                 Consensus in Partitionable Networks",
  author =       "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg",
  year =         "1997",
  institution =  "Cornell University, Computer Science Department",
  month =        jun,
  pages =        "24",
  number =       "TR97-1632",
  abstract =     "We consider partitionable networks with process
                 crashes and lossy links, and focus on the problems of
                 reliable communication and consensus for such networks.
                 For both problems we seek algorithms that are
                 quiescent, i.e., algorithms that eventually stop
                 sending messages. We first tackle the problem of
                 reliable communication for partitionable networks by
                 extending the results of [ACT97a]. In particular, we
                 generalize the specification of the heartbeat failure
                 detector HB, show how to implement it, and show how to
                 use it to achieve quiescent reliable communication. We
                 then turn our attention to the problem of consensus for
                 partitionable networks. We first show that, even though
                 this problem can be solved using a natural extension of
                 <>S, such solutions are not quiescent --- in other
                 words, <>S alone is not sufficient to achieve quiescent
                 consensus in partitionable networks. We then solve this
                 problem using <>S and the quiescent reliable
                 communication primitives that we developed in the first
                 part of the paper. Our model of failure detectors for
                 partitionable networks, a natural extension of the
                 model in [CT96], is also a contribution of this
                 paper.",
  annote = "See the Journal version \cite{Aguilera:1999:UHF}."
}


@TechReport{Aguilera:1997:WFD,
  title =        "On the Weakest Failure Detector for Quiescent Reliable
                 Communication",
  author =       "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg",
  year =         "1997",
  month =        jul,
  pages =        "16",
  number =       "TR97-1640",
  institution =  "Cornell University, Computer Science Department",
  abstract =     "We consider the problem of achieving reliable
                 communication with quiescent algorithms (i.e.,
                 algorithms that eventually stop sending messages) in
                 asynchronous systems with process crashes and lossy
                 links, and show that, among failure detectors with
                 bounded output size, <>P is the weakest one that can be
                 used to solve this problem. Combined with a result in
                 [ACT97a], this shows that failure detectors that are
                 commonly used in practice, i.e., those that output
                 lists of suspects, are not always the best ones to
                 solve a problem.",
  annote =       "[to read]"
}


@Article{Alur:1997:TAA,
  title =        "Time-Adaptive Algorithms for Synchronization",
  author =       "Rajeev Alur and Hagit Attiya and Gadi Taubenfeld",
  pages =        "539--556",
  journal =      "SIAM Journal on Computing",
  month =        apr,
  year =         "1997",
  volume =       "26",
  number =       "2",
  annote = "Proves that time is insignificant to safety properties. 
            Referenced in \cite{Merritt:1998:FSO}. Looks at consensus
            and mutual exclusion in shared memory environments that have
            an unknown upper bound on memory access times."
}

@Misc{Arora:1997:OCC,
  OPTkey = 	 {},
  author = 	 {Anish Arora and Mohamed G. Gouda},
  title = 	 {On the correctness criteria of load balancing programs},
  howpublished = {Internet: ftp://ftp.cis.ohio-state.edu/pub/anish/papers/load-balancing.ps.gz},
  month = 	 apr,
  year = 	 {1997},
  note = 	 {},
  annote = 	 {revised version of \cite{Arora:1995:ECC}, submitted to IEEE TPDS.}
}

@InProceedings{Asokan:1997:OPF,
  author =       "N. Asokan and M. Schunter and M. Waidner",
  title =        "Optimistic Protocols for Fair Exchange",
  pages =        "8--17",
  booktitle =    "4th {ACM} Conference on Computer and Communications
                 Security",
  address =      "Zurich, Switzerland",
  year =         "1997",
  publisher =    "ACM Press",
  month =        apr,
  editor =       "Tsutomu Matsumoto",
  annote =       "[to get]"
}


@Article{Avizienis:1997:TSD,
  author =       "Algirdas Avizienis",
  title =        "Toward Systematic Design of Fault-Tolerant Systems",
  journal =      j-IEEE-COMPUTER,
  volume =       "30",
  number =       "4",
  pages =        "51--58",
  month =        apr,
  year =         "1997",
  annote =       "[to read]"
}

@TechReport{Babaoglu:1997:PGM,
  author = 	 "{\"O}zalp {Babao\u{g}lu} and Renzo Davoli and Albert
		  Montresor",
  title = 	 "Partitionable group membership: specification and
		  algorithms",
  institution =  "Department of Computer Science, University of
		  Bologna, Italy",
  year = 	 "1997",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTtype = 	 "",
  number = 	 "UBLCS-97-1",
  OPTaddress = 	 "",
  month = 	 jan,
  note = 	 "Revised May 1997.",
  OPTannote = 	 "[to read]"
}

@InProceedings{Beauquier:1997:OFS,
  title =        "On {FTSS}-Solvable Distributed Problems",
  author =       "Joffroy Beauquier and Synn{\"o}ve Kekkonen-Moneta",
  pages =        "290",
  booktitle =    "Proceedings of the Sixteenth Annual {ACM} Symposium on
                 Principles of Distributed Computing",
  address =      "Santa Barbara, California",
  month =        "21--24~" # aug,
  year =         "1997",
  annote =       "Brief announcement at PODC, 1 page only. See also 
                  \cite{Beauquier:1996:MFH,Beauquier:1997:FTS,Kekkonen:1998:RFA}."
}




@Article{Beauquier:1997:FTS,
  author = 	 {Joffroy Beauquier and {Synn\"ove} Kekkonen-Moneta},
  title = 	 {Fault-tolerance and self-stabilization: impossibility
                  results and solutions using self-stabilizing failure
                  detectors},
  journal = 	 {International Journal of System Science},
  year = 	 {1997},
  OPTkey = 	 {},
  volume = 	 {28},
  number = 	 {11},
  pages = 	 {1177--1187},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {A rounding-up paper of previous work in fault-tolerance
     and self-stabilization started with \cite{Gopal:1993:USF} and
     \cite{Anagnostou:1993:TTP}. The authors show that the
     transformation of a fault-tolerant protocol into a fault-tolerant
     self-stabilizing (ftss) protocol (performed in synchronous
     environments in \cite{Gopal:1993:USF}) cannot be extended to
     asynchronous environments because it is impossible to distinguish
     a slow process from a crashed one. Then, they show that the
     impossibility result of \cite{Anagnostou:1993:TTP} (which also
     rests on the necessity to distinguish a crashed from a slow
     process) can be extended to a class of network. These results can
     be circumvented by adding some synchrony to the model in the form
     of failure detectors (in the spirit of \cite{Chandra:1996:UFD}).
     The synchrony assumption here is called ``fair communication'',
     meaning that a correct process can receive only finitely many
     messages from any one correct neighbor before receiving a message
     from every other correct neighbor. (Processes are assumed to emit
     a message to every neighbor at every tick of their local clock.)
     This seems to be equivalent to a combination of stabilizing clock
     drift and stabilizing transmission delay. The authors give
     implementations for failure detectors based on this assumption
     for both cases whether or not the bound is know or only the time
     until it holds is unknown or not (in the spirit of partial
     synchrony \cite{Dwork:1988:CPP}). The ideas herein are exposed
     more elaborately in Kekkonen-Moneta's thesis
     \cite{Kekkonen:1998:RFA}.}
}



@PhdThesis{Borcherding:1997:AEB,
  author = 	 {Malte Borcherding},
  title = 	 {{Authentifikationsvoraussetzungen f\"ur effiziente
                  byzantinische \"Ubereinstimmung}},
  school = 	 {Universit\"at Karlsruhe, Fakult\"at f\"ur Informatik},
  year = 	 {1997},
  OPTkey = 	 {},
  OPTtype = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  note = 	 {Logos-Verlag, Berlin},
  annote = 	 {Stellt mehrere Zwischengrade der Authentifikation vor,
                  die effizientere Agreement-Algorithmen ermoeglichen.
                  Implizit Definition einer deutschen Terminologie fuer
                  Uebereinstimmungsprobleme.}
}

@Article{Chen:1997:FRC,
  author =       "Biao Chen and Sanjay Kamat and Wei Zhao",
  title =        "Fault-Tolerant, Real-Time Communication in
                 {FDDI}-Based Networks",
  journal =      j-IEEE-COMPUTER,
  volume =       "30",
  number =       "4",
  pages =        "83--90",
  month =        apr,
  year =         "1997",
  annote =       "[to read]"
}



@InProceedings{Dolev:1997:FDO,
  author = 	 {Danny Dolev and Roy Friedmann and Idit Keidar and 
                 Dahlia Malkhi},
  title = 	 {Failure detectors in omission failure environments},
  booktitle = 	 pro-podc97,
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {186},
  year = 	 {1997},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  OPTannote = 	 {This is a brief announcement (1 page) at PODC97. See 
                  also the technical report version \cite{Dolev:1996:FDO}
                  [to read]}
}

@Article{Dolev:1997:PIS,
  author =       "Shlomi Dolev",
  title =        "Possible and impossible self-stabilizing digital clock
                 synchronization in general graphs",
  journal =      "Journal of Real-Time Systems",
  volume =       "12",
  number =       "1",
  year =         "1997",
  pages =        "95--107",
  annote =       "This paper contains a good general survey of clock
		  synchronization in shared-memory multiprocessor
		  systems with a general communication graph."
}

@Article{Dolev:1997:SRR,
  title =        "Self-Stabilizing Routing and Related Protocols",
  author =       "Shlomi Dolev",
  pages =        "122--127",
  journal =      "Journal of Parallel and Distributed Computing",
  year =         "1997",
  volume =       "42",
  number =       "2",
  annote =       "[to read]"
}

@TechReport{Doudou:1997:MDC,
  author = 	 "Assai Doudou and {Andr\'e} Schiper",
  title = 	 "Muteness detectors for consensus with {Byzantine}
		  processes",
  institution =  "EPFL -- {D\'epartement} d'Informatique, Lausanne,
		  Switzerland",
  year = 	 "1997",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTtype = 	 "",
  OPTnumber = 	 "TR-97/230",
  OPTaddress = 	 "",
  OPTmonth = 	 oct,
  OPTnote = 	 "",
  annote = 	 "The authors extend the notion of failure detectors
		  to the Byzantine failure model. Generally, a
		  Byzantine process can do four things: (1) ignore
		  another process, or (2) send garbled messages to
		  another process, or (3) send messages that seem
		  correct to another process but do not follow the
		  protocol, or (4) skip protocol messages. To combat
		  this type of faulty behavior the authors present a
		  muteness failure detector. A process i is mute to a
		  process j if there is a time after which i crashes,
		  or i stops sending messages to j, or i sends only
		  incorrectly signed messages or unsigned messages to
		  j. Based on this definition, the muteness detector
		  is defined in terms of the traditional eventual weak
		  accuracy property and mute completeness, stating
		  that eventually every process i which is mute to
		  process j is permanently suspected by j. The
		  muteness detector guards against (1) and (2). The
		  behaviors (3) and (4) can be detected and corrected
		  by usual methods to solve Byzantine agreement
		  \cite{Lamport:1982:BGP}. Channels must be FIFO to be
		  able to detect missing messages (previous solutions
		  required causal message delivery
		  \cite{Malkhi:1997:UID}). The authors adapt the
		  consensus specification to Byzantine environments
		  (resulting in a definition of the vector consensus
		  problem) and give an algorithm that uses the
		  muteness detector to achieve consensus in a
		  Byzantine environment. The algorithm is based on the
		  early consensus algorithm by Schiper
		  \cite{Schiper:1997:ECA}. There's a mentioning of the
		  echo broadcast technique for solving the Byzantine
		  Generals Problem \cite{Lamport:1982:BGP} with signed
		  messages." 
}

@InProceedings{Fetzer:1997:FAA,
  author =       "Christof Fetzer and Flaviu Cristian",
  title =        "Fail-Awareness: An Approach to Construct Fail-Safe
                 Applications",
  pages =        "282--291",
  booktitle =    "Proceedings of The Twenty-Seventh Annual International
                 Symposium on Fault-Tolerant Computing ({FTCS}'97)",
  ISBN =         "0-8186-7831-3",
  month =        jun,
  publisher =    "IEEE",
  year =         "1997",
  annote =       "The authors introduce the notion of fail-awareness
		  as an approach to construct fail-safe
		  applications. Fail-awareness is based on the idea
		  that the underlying system is timed asynchronous,
		  i.e., it is synchronous with a bound on timeliness
		  and failure rate most of the time, and asynchronous
		  in special cases. If such asynchronous phases can be
		  detected, the affected parts of the system must
		  switch into an exception mode that signals this fact
		  to clients. In this way, the system may degenerate
		  in a safe way. If synchronous performance is
		  re-established, services may re-join and catch up
		  again. Fail-awareness can be used to transform
		  synchronous service specifications so that they
		  become implementable in timed asynchronous
		  systems. The detection of timeliness properties is
		  based on synchronized clocks. A hierarchy of
		  fail-aware services is presented. Overall, this
		  paper is very dense and much of the details of
		  protocols are left to other references which should
		  be read to be convincing. It is another example that
		  detection is a prerequisite of fail-safe or masking
		  fault tolerance."
}

@inproceedings{Fetzer:1997:FAD,
   author    = {Christof Fetzer and Flaviu Cristian},
   title     = {A Fail-Aware Datagram Service},
   booktitle = {Proceedings of the 2nd Annual Workshop on Fault-Tolerant Parallel and Distributed Systems},
   year      = {1997},
   month     = {Apr},
   address   = {Geneva, Switzerland},
   note      = {\url{http://www-cse.ucsd.edu/users/cfetzer/FADS/fads.html}},
   annote = "[to read]"
}

@InProceedings{Fetzer:1997:TAA,
  author =       "Christof Fetzer and Shivakant Mishra and Flaviu
                 Cristian",
  title =        "The Timewheel Asynchronous Atomic Broadcast Protocol",
  booktitle =    "International Conference on Parallel and Distributed
                 Processing Techniques and Applications (PDPTA'97)",
  publisher =    "IEEE",
  address =      "Las Vegas, Nevada, USA.",
  month =        jun,
  year =         "1997",
  abstract =     "http://www.cps.udayton.edu/\~{}pan/pdpta.",
  annote = "Presents a collection of several total order broadcast protocols.
    Ordering can be unordered, total or time order, atomicity can be 
    weak, strong or strict. Focus is on performance issues, unlike
    \cite{Hadzilacos:1994:MAF}. I think there's also aomething called
    timewheel group membership."
    
}

@InProceedings{Franklin:1997:FES,
  author =       "Matthew K. Franklin and Michael K. Reiter",
  title =        "Fair Exchange with a Semi-Trusted Third Party",
  pages =        "1--5",
  booktitle =    "4th {ACM} Conference on Computer and Communications
                 Security",
  address =      "Z{\"u}rich, Switzerland",
  year =         "1997",
  publisher =    pub-ACM,
  month =        apr,
  editor =       "Tsutomu Matsumoto",
  annote = "active exchange? [to get]"
}

@Misc{Gaertner:1997:FRD,
  OPTcrossref =  "",
  OPTkey =       "",
  author =       {Felix {G\"artner}},
  title =        "Fehlertolerante {Replikation} von {Diensten} mit
                  schwacher {Konsistenz} mittels selbststabilisierender
                  verteilter {Algorithmen}",
  howpublished = {Diplomarbeit DA-BS-1997-06 am Fachgebiet
                  Betriebssysteme des Fachbereichs
                  Informatik, Technische Universit"at Darmstadt},
  year =         "1997",
  month =        "December",
  note =         {Internet:
                  \texttt{http://www.informatik.tu-darmstadt.de/\-$\tilde{}$felix/diploma}},
  OPTannote =    ""
}





@InProceedings{Garg:1997:OCD,
  author = 	 "Vijay K. Garg",
  title = 	 "Observation and control for debugging distributed
		  computations",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  pages = 	 "1--12",
  booktitle = "3rd Int. Workshop on Automated Debugging (AADEBUG
		  97)",
  year = 	 "1997",
  OPTorganization = "",
  OPTpublisher = "",
  address = 	 "Link{\"o}ping, Sweden",
  url = "\url{http://www.ep.liu.se/ea/cis/1997/009/}",
  month = 	 may,
  OPTnote = 	 "keynote presentation",
  annote = 	 "As one of the ``big men'' in theory of distributed
		  systems, Garg presents here an overview over the
		  topics of observation and control of distributed
		  computations. Objective of control is to either
		  maintain an invariant on a global state or to ensure
		  a proper order of events. Observation is used to
		  monitor system actions. Three restrictions impose
		  problems on observation: (1) the lack of shared
		  clock can be alliviated by substituting causality
		  for real time and detecting predicates transformed
		  using `possibly' and `definitely'. Possibly true
		  predicates are useful for detecting bad conditions,
		  whereas definitely true predicates are useful to
		  verify the occurence of good predicates. (2) The
		  lack of shared memory can be alliviated by using the
		  notion of monotonicity. A predicate is monotone with
		  resprect to a variable if monotonic changing of that
		  variable doesn't change the truth of the
		  predicate. This allows us to restrict our attention
		  to state intervals rather than states. This allows
		  us to redice the number of events that must be
		  inspected drastically. (3) Combinatorial explosion
		  is combatted by the use of linear predicates. In
		  general, detecting possibly is NP-complete. However,
		  linear predicates can be detected efficiently: a
		  predictate is linear if its value `false' can be
		  detected ``locally'' (i.e., it contains a forbidden
		  state of a process or channel). So conjunctions of
		  local predicates can be efficiently detected. The
		  paper briefly surveys some possibly detection
		  algorithms and states some open problems. The it
		  turns to the issue of control and discusses
		  different modes (on-line, off-line) and methods
		  (delaying/reordering events). Finally, a fictionous
		  (but implementable) distributed debugger is
		  decribed. Overall, this is a very fluent
		  introductory paper to the issues of observation (and
		  control) in distributed systems."
}

@InProceedings{Guerraoui:1997:CBM,
  author =       "Rachid Guerraoui and {Andr\'e} Schiper",
  title =        "Consensus: the big misunderstanding",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  OPTpages =     "???",
  booktitle = "Proceedings of the 6th Workshop on Future Trends of
                  Distributed Computing Systems (FTDCS-6)",
  year =         "1997",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  month =        oct,
  OPTnote =      "",
  annote =       "This paper tries to clarify six popular
                  misunderstandings about the consensus problem that
                  prevent consensus as being considered fundamental
                  both in theory and in practice. The
                  misunderstandings are: (1) Consensus is for
                  theoreticians only, (2) Time-outs are enough, (3)
                  There is no life after FLP, (4) The failure detector
                  model is unrealistic, (5) Time-free means
                  inefficient, (6) Asynchronous algorithms cannot be
                  used for time critical applications. A very concise
                  and well readable paper that does good summarizing
                  work and is a good source for arguments."
}

@InProceedings{Guerraoui:1997:GAM,
  author =       "Rachid Guerraoui and {Andr\'e} Schiper",
  title =        "Genuine atomic multicast",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  number =       "1320",
  series =       ser-LNCS,
  pages =        "141--154",
  booktitle = pro-WDAG97,
  year =      "1997",
  OPTorganization = "",
  publisher = pub-SV,
  OPTaddress =   "",
  month =        sep,
  OPTnote =      "",
  annote =       "The authors define genuine atomic multicast to be an
                  atomic multicast with a specific minimality property,
                  i.e. that only the processes in the multicast group
                  ``act'' and others remain ``quiet'' (this is as
                  opposed to atomic multicast faked by an underlying
                  atomic broadcast). They show that genuine atomic
                  multicast is strictly stronger than atomic broadcast
                  in that it needs a perfect failure detector (thus
                  stricter synchrony assumptions) to be solvable in
                  asynchronous systems. They argue, that it is exactly
                  the minimality requirement that makes the problem
                  unsolvable with unreliable failure detection."
}




@Article{Guerraoui:1997:SBR,
  author =       "Rachid Guerraoui and Andr{\'e} Schiper",
  title =        "Software-Based Replication for Fault Tolerance",
  journal =      j-IEEE-COMPUTER,
  volume =       "30",
  number =       "4",
  pages =        "68--74",
  month =        apr,
  year =         "1997",
  keywords =     "correctness criterion; cost; fault tolerance; group
                 communication; linearizability; message passing;
                 off-the-shelf hardware; replicated servers; replicated
                 service implementation techniques; reviews; software
                 fault tolerance; software-based replication;
                 specialized hardware; survey",
  treatment =    "G General Review",
  annote =       "This is a general survey over software based
		  replication techniques to achieve fault tolerance
		  with a strong emphasis on the relations to group
		  communication and consensus. Issues of
		  view-synchronous and totally ordered communication
		  and their relation to consensus using unreliable
		  failure detectors are discussed. In general, this is
		  a lightweight overview paper that doesn't upset your
		  tummy."
}

@Article{Hsueh:1997:FIT,
  author =       "Mei-Chen Hsueh and Timothy K. Tsai and Ravishankar K.
                 Iyer",
  title =        "Fault Injection Techniques and Tools",
  journal =      j-IEEE-COMPUTER,
  volume =       "30",
  number =       "4",
  pages =        "75--82",
  month =        apr,
  year =         "1997",
  annote =       "A survey over current fault injection techniques and
		  tools. A good quote. Interesting are the different
		  types of software fault injection techniques and
		  their relations to the program transformational
		  approach in describung failure models
		  \cite{Gaertner:1998:SFT}. They have the same
		  underlying idea but a different purpose: one is
		  experimental (and dynamic) and the other is
		  theoretical (and static). A German reference is
                  \cite{Echtle:1998:FMB}."
}

@TechReport{Hurfin:1997:CAS,
  author = 	 "Michel Hurfin and Achour {Most\'efaoui} and Michel Raynal",
  title = 	 "Consensus in asynchronous systems where processes
		  can crash and recover",
  institution =  "Institut de Recherche en Informatique et Syst\`emes
		  Al\'eatoires (IRISA)",
  year = 	 1997,
  number =	 1144,
  address = 	 "Campus de Beaulieu, 35042 Rennes Cedex, France",
  month = 	 nov,
  annote = 	 "[to read] surveyed in \cite{Aguilera:1998:FDC}.
                 Published at SRDS'98 \cite{Hurfin:1998:CAS}."
}

@INPROCEEDINGS{Kakugawa:1997:DSD,
        AUTHOR = "Hirotsugu Kakugawa and Masaaki Mizuno and Mikhail
                  Nesterenko",
        TITLE = "Development of self-stabilizing distributed
                  algorithms using transformation: case studies",
        PAGES = "16-30",
        BOOKTITLE = pro-wss97,
        YEAR = 1997,
        annote = "The authors evaluate their transformation algorithm
                  \cite{Mizuno:1996:TBT} from the serial model to the
                  distributed model on several examples including
                  lock-based mutual exclusion and leader
                  election. They conclude that transformed algorithms
                  have a larger message complexity (which depends on
                  the choice of timeout values) but this is paid off
                  by sparing the hassle of developing, debugging and
                  verifying algorithms for the distributed model from
                  scratch. Simulation results suggest, that both types
                  of algorithms have the same asymptotic message
                  complexity."
}

@TechReport{Kreitz:1997:FRC,
  author =       "Christoph Kreitz",
  title =        "Formal reasoning about communication systems {I}:
                  {Embedding} {ML} into type theory",
  institution =  "Cornell University",
  year =         "1997",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTtype =      "",
  number =       "TR97-1637",
  address =      "Ithaca",
  month =        jul,
  OPTnote =      "",
  annote =       "Abstract: We present a semantically correct
                  embedding of a subset of the Ocaml programming
                  language into the type theory of NuPRL. The subset
                  is that needed to build the Ensemble group
                  communication system. We describe the essential
                  methodologies for representing language constructs
                  by type-theoretical expressions. Tactics
                  representing derived inference rules and a
                  programming logic for these constructs will be
                  discussed as well as algorithms for translating an
                  Ocaml-program into NuPRL-objects and vice versa. The
                  formal representations and the translation
                  algorithms will serve as the foundation for the
                  development of automated reasoning tools for the
                  verification and optimization of a group
                  communication systems. [(noch) nicht ausgedruckt]"
}




@Article{Kuhn:1997:SFP,
  author =       "D. Richard Kuhn",
  title =        "Sources of Failure in the Public Switched Telephone
                 Network",
  journal =      j-IEEE-COMPUTER,
  volume =       "30",
  number =       "4",
  pages =        "31--36",
  month =        apr,
  year =         "1997",
  annote =       "[to read]"
}

@InProceedings{Kulkarni:1997:CDM,
  author =       "Sandeep S. Kulkarni and Anish Arora",
  title =        "Compositional design of multitolerant repetitive
                  {Byzantine} agreement",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  OPTpages =     "",
  booktitle = "Proceedings of the 18th International Conference on
                  the Foundations of Software Technology and
                  Theoretical Computer Science, Kharagpur, India",
  year =         "1997",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "Byzantine agreement is taken as an application
                  example of building fault tolerant programs using the
                  detectors and correctors methodology of Arora and
                  Kulkarni \cite{Arora:1998:CDM}."
}



@InProceedings{Malkhi:1997:UID,
  author = 	 "Dahlia Malkhi and Michael Reiter",
  title = 	 "Unreliable Intrusion Detection in Distributed Computations",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  pages = 	 "116--124",
  booktitle = "Proceedings of the 10th Computer Security
		  Foundations Workshop (CSFW97)",
  year = 	 "1997",
  OPTorganization = "",
  OPTpublisher = "",
  address = 	 "Rockport, MA",
  month = 	 jun,
  OPTnote = 	 "",
  annote = 	 "[to read]"
}

@Book{Menezes:1997:HAC,
  author =       "Alfred J. Menezes and Paul C. Van Oorschot and Scott
                  A. Vanstone",
  title =        "Handbook of Applied Cryptography",
  publisher =    "CRC Press, Boca Raton, FL",
  year =         "1997",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  OPTaddress =   "",
  OPTedition =   "",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "Brilliant and beautiful book on all aspects of
                  cryptography with a strong practical perspective
                  without diving into source code (like Schneier)."
}



@Article{Nelles:1997:NNI,
  author = 	 {O. Nelles and S. Ernst and R. Isermann},
  title = 	 {{Neuronale Netze zur Identifikation nichtlinearer 
                  dynamischer Systeme: ein \"Uberblick}},
  journal = 	 {Automatisierungstechnik},
  year = 	 {1997},
  OPTkey = 	 {},
  volume = 	 {45},
  number = 	 {6},
  pages = 	 {251--262},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}

@TechReport{Oliveira:1997:CCR,
  author = 	 "Rui Oliveira and Rachid Guerraoui and {Andr\'e} Schiper",
  title = 	 "Consensus in the crash-recover model",
  institution =  "EPFL -- {D\'epartment} d'Informatique, Lausanne,
		  Switzerland",
  year = 	 "1997",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTtype = 	 "",
  number = 	 "TR-97/239",
  OPTaddress = 	 "",
  month = 	 aug,
  OPTnote = 	 "",
  annote = 	 "[to read] surveyed in \cite{Aguilera:1998:FDC}."
}

@InProceedings{Pagnia:1997:TMP,
  title =        "Towards Multiple-Payment Schemes for Digital Money",
  author =       "Henning Pagnia and Ralph Jansen",
  pages =        "203--215",
  booktitle =    "Financial Cryptography: First International
                 Conference, {FC}~'97",
  editor =       "Rafael Hirschfeld",
  series =       ser-LNCS,
  volume =       "1318",
  year =         "1997",
  month =        "24--28~" # feb,
  address =      "Anguilla, British West Indies",
  publisher =    pub-SV,
  ISBN =         "3-540-63594-7",
  references =   "{CRYPTO::Brands1993} {CRYPTO::chaumFN1988}
                 {EUROCRYPT::ChaumP1992}
                 {CRYPTO::Ferguson1993}
                 {EUROCRYPT::Jakobsson1995}",
  annote = "[to read] reinvented in \cite{Riordan:1998:CEP}."
}

@InProceedings{Prisco:1997:RPA,
  author =       "Roberto De Prisco and Butler Lampson and Nancy Lynch",
  title =        "Revisiting the Paxos Algorithm",
  booktitle =    pro-wdag97,
  pages =        "111--125",
  year =         "1997",
  annote =       "[to read]"
}



@Misc{Rock:1997:TSC,
  OPTkey = 	 {},
  author = 	 {Georg Rock and Werner Stephan and Andreas Wolpers},
  title = 	 {Tool support for the compositional development
                  of distributed systems},
  howpublished = {\url{http://www.dfki.uni-sb.de/vse/papers/rsw97.ps.gz}},
  month = 	 may,
  year = 	 {1997},
  OPTnote = 	 {},
  annote = 	 {[to read] published elsewhere?}
}

@INPROCEEDINGS{Rushby:1997:SFV,
        AUTHOR = {John Rushby},
        TITLE = {Systematic Formal Verification for Fault-Tolerant
                Time-Triggered Algorithms},
        BOOKTITLE = {Dependable Computing for Critical Applications---6},
        SERIES = {Dependable Computing and Fault Tolerant Systems},
        Volume = 11,
        YEAR = 1997,
        EDITOR = {Mario Dal Cin and Catherine Meadows and William H. Sanders},
        PUBLISHER = {IEEE Computer Society},
        ADDRESS = {Garmisch-Partenkirchen, Germany},
        MONTH = mar,
        PAGES = {203--222},
        annote = "Rushby argues for the separation of algorithm
		  functionality and timeliness properties. Proofs for
		  time-critical modules can be quite combersome if
		  they are tried as is, but they can become much
		  simpler if the abstract functionality is proven
		  correct and they are then embedded into a real-time
		  environment in a safe way by a once-and-for-all
		  proven methodology (an idea also proposed by Le Lann
		  \cite{LeLann:1995:ORN}). Rushby presents such a
		  transformation for (synchronous) round based
		  algorithms: such an algorithm can be mechanically
		  transformed into a time-triggered implementation
		  with tight real-time bounds mechanically. The case
		  is made by transforming the famous oral message BGP
		  protocol \cite{Lamport:1982:BGP} into a
		  time-triggered version by hand and using the PVS
		  automated proof system."
}



@Article{Schiper:1997:ECA,
  author =       "{Andr\'e} Schiper",
  title =        "Early consensus in an asynchronous system with a
                  weak failure detector",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-DC,
  year =         "1997",
  volume =       "10",
  number =       "3",
  pages =        "149--157",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "The author presents a new algorithm for consensus in
		  asynchronous systems which is an improvement over
		  the original algorithm by Chandra and Toueg
		  \cite{Chandra:1996:UFD}. Both use an unreliable
		  eventually strong failure detector. The new early
		  consensus algorithm uses the rotating coordinator
		  paradigm and proceeds in asynchronous rounds. At the
		  beginning of a round, the coordinator sends its
		  estimate to all and tries to impose this value on
		  the rest. A process receiving this estimate reissues
		  it to all. As soon as a process receives this
		  estimate from a majority of processes, it decides on
		  that estimate. The algorithm ensures that once a
		  majority of processes have adopted the same
		  estimate, this value is locked and doesn't change
		  anymore. So once a process decides, all other
		  processes that decide do not decide differently. The
		  failure detector ensures the liveness of the
		  protocol. In comparison to the original
		  Chandra/Toueg algorithm (CT) early consensus uses
		  $n(n-1)$ messages to reach a decision in
		  point-to-point networks while CT uses $3(n-1)$
		  messages. However, the decision value must be sent
		  to all (to cater for failure cases), and so both
		  algorithms needs an additional $n(n-1)$ messages for
		  the total execution. Both therefore have $O(n^2)$
		  message complexity. However, early consensus has a
		  lower latency degree. The latency degree is defined
		  to be the largest timestamp of logical time, where
		  ``messages tick''. This is a more precise measure
		  for the number of rounds that an algorithm needs to
		  execute. Early consensus has a latency degree of 2,
		  whereas CT has a latency degree of 4 (easily
		  optimzed to 3). The efficiency stems from improving
		  parallelism by adding messages in the second part of
		  a round. So early consensus is both an improvment in
		  simplicity as it is in efficiency: See also Erratum
		  \cite{Schiper:1997:EEC}." 
}

@Article{Schiper:1997:EEC,
  author =       "{Andr\'e} Schiper",
  title =        "Erratum: Early consensus in an asynchronous system
		  with a weak failure detector",
  journal =      j-DC,
  year =         "1997",
  volume =       "10",
  pages =        "198",
  annote =       "corrections of lines 34 and 46 in Figure 1 of page
		  153."
}



@InProceedings{Setz:1997:DIA,
  author = 	 {Thomas Setz},
  title = 	 {Design, implementation and performance of a fault tolerant
                  tuple space machine},
  booktitle = 	 {Proceedings of the International Conference on Parallel
                  and Distributed Systems (ICPADS'97)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {10--13},
  year = 	 {1997},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Seoul, Korea},
  month = 	 dec,
  OPTorganization = {},
  publisher = pub-IEEE-CSP,
  OPTnote = 	 {},
  annote = 	 {Conference version of \cite{Setz:1997:DIP}.}
}

@TechReport{Setz:1997:DIP,
  author = 	 {Thomas Setz},
  title = 	 {Design, Implementation and Performance of a Mutex-Token based
                 Fault-Tolerant Tuple Space Machine},
  institution =  {Sonderforschungsbereich 124, Universit{\"a}t des Saarlandes},
  year = 	 {1997},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {SFB 124 - 09/1997, TP D5},
  address = 	 {Fachbereich Informatik, 66041 {Saarbr\"ucken}, Germany},
  month = 	 jul,
  OPTnote = 	 {},
  url =          "\url{http://cdc-server.cdc.informatik.tu-darmstadt.de/home/LiPS/LiPS/documentation/objects/doc/html/papers/FTTM/FTTM_technical_report_sb/FTTM_technical_report_sb.html}",
  annote = 	 {Introduction to LiPS and description of the memberschip
                  protocol used to make the tuple space engine fault 
                  tolerant. Appeared at ICPADS'97 \cite{Setz:1997:DIA}.}
}

@InProceedings{Sims:1997:RMS,
  author =       "J. T. Sims",
  title =        "Redundancy Management Software Services for Seawolf
                 Ship Control System",
  pages =        "390--394",
  booktitle =    pro-ftcs97,
  ISBN =         "0-8186-7831-3",
  month =        jun,
  publisher =    "IEEE",
  address =      "Washington - Brussels - Tokyo",
  year =         "1997",
  annote = "Seawolf is a ``new'' class of US Navy attack
  submarines. Its computer system is quadruply redundant with four
  independent fault containment regions which use Byzantine tolerant
  voting to achieve consensus on output. The voting process is
  implemented in simple hardware and also is quadruply redundant. The
  system is masking tolerant against upt to two non-simultaneous
  permanent faults before it is fail-safed. Faulty components can be
  exchanged online. The processors operate in lock-step synched
  mode. Fault detection and isolation methods and reconfiguration
  facilities are also described."
}

@InProceedings{Stoller:1997:DGP,
  author =       {Scott D. Stoller},
  title =        {Detecting Global Predicates in Distributed Systems
                  with Clocks},
  booktitle =    pro-wdag97,
  OPTcrossref =  {},
  OPTkey =       {},
  OPTeditor =       {Marios Mavronicolas and Philippas Tsigas},
  OPTvolume =    {},
  OPTnumber =       {1320},
  OPTseries =       ser-LNCS,
  year =         {1997},
  OPTorganization = {},
  OPTpublisher =    pub-SV,
  OPTaddress =   {},
  month =        sep,
  pages =        {185--199},
  OPTnote =      {},
  annote =       "Stoller proposes a generalization of predicate
                  detection in distributed computations based on
                  lattice theory: he shows that any partial order with
                  certain properties can be used to reason about
                  consistent global states. From such an ordering
                  follow generic definitions of the modalities
                  `possibly' and `definitely' introduced by Cooper and
                  Marzullo \cite{Cooper:1991:CDG}. The author
                  instantiates his generic definitions with two orders
                  which are based on the values of synchronized
                  clocks. The first is called `definitely occured
                  before' and the second `possibly occured before'; he
                  also presents adaptions of known algorithms to
                  detect them. Such algorithms can be optimized if the
                  predicate has a certain (conjunctive) form
                  (analogous to local detectability in constraint
                  satisfaction \cite{Arora:1996:CSB}). A combination
                  of possibly and definitely called `instantaneously'
                  (or `properly') is introduced and
                  discussed. Application of the results is seen in
                  online monitoring and debugging of distributed
                  applications, not in fault tolerance, although the
                  example of debugging database coherence protocols is
                  near to detecting illegal states."
}





@TechReport{Weber:1997:DAW,
  author = 	 "Michael Weber and Rolf Walter and Hagen {V\"olzer}
		  and Tobias Vesper and Wolfgang Reisig and Sibylle
		  Peuker and Ekkart Kindler and {J\"orn} Freiheit and
		  {J\"org} Desel",
  title = 	 "{DAWN}: {Petrinetzmodelle} {zur} {Verifikation}
		  {Verteilter} {Algorithmen}",
  institution =  "Humboldt-{Universit\"at} Berlin, Institut {f\"ur}
		  Informatik",
  year = 	 "1997",
  OPTcrossref =  "",
  OPTkey = 	 "",
  type = 	 "Informatik-Bericht",
  number = 	 "88",
  address = 	 "Unter den Linden 6, D-10099 Berlin",
  month = 	 dec,
  OPTnote = 	 "",
  OPTannote = 	 "[to read]"
}

@Misc{Wilhelm:1997:CPO,
  author =               "Uwe G. Wilhelm",
  title  =               "Cryptographically Protected Objects",
  month  =               may,
  year   =               1997,
  note =                 "A french version appeared in the Proceedings
                          of RenPar'9, Lausanne, CH. {{\tt
                          http://lsewww.epfl.ch/\~{}wilhelm/CryPO.html}}",
  annote = "presents the idea of a tamper proof computing environment."
}


@TechReport{Aguilera:1998:FDCTR,
  title =        "Failure Detection and Consensus in the Crash-Recovery
                 Model",
  author =       "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg",
  number =       "TR98-1676",
  year =         "1998",
  institution =  "Cornell University, Computer Science Department",
  abstract =     "We study the problems of failure detection and
                 consensus in asynchronous systems in which processes
                 may crash and recover, and links may lose messages. We
                 first propose new failure detectors that are
                 particularly suitable to the crash-recovery model. We
                 next determine under what conditions stable storage is
                 necessary to solve consensus in this model. Using the
                 new failure detectors, we give two consensus algorithms
                 that match these conditions: one requires stable
                 storage and the other does not. Both algorithms
                 tolerate link failures and are particularly efficient
                 in the runs that are most likely in practice --- those
                 with no failures or failure detector mistakes. In such
                 runs, consensus is achieved within 3d time and with 4n
                 messages, where d is the maximum message delay and n is
                 the number of processes in the system.",
  month =        apr,
  annote =       "The authors extend the work on asynchronous
		  consensus using unreliable failure detectors to a
		  more severe fault model than previous research has
		  considered: now nodes may crash and recover, and
		  links may lose messages. The authors first derive
		  specifications for failure detectors which are
		  better suited for this new fault model than those
		  proposed in earlier papers by other authors. They do
		  this by showing that the usual strong completeness
		  property for the crash-recovery model (stating that
		  eventually every bad process is permanently
		  suspected by all good processes) is too strong
		  because these detectors have to make predictions on
		  the future behavior of other processes. They propose
		  a new form of failure detectors with an infinite
		  output domain and with different properties that
		  circumvents the problems of the previous
		  specification. Next, the authors identify, under
		  what conditions stable storage is necessary to solve
		  consensus in such an environment.  They show that as
		  long as the number of always-up processes is less or
		  equal to the number of eventually-down processes
		  consensus cannot be reached even if links do not
		  lose messages and an eventually perfect failure
		  detector can be used. Saving the proposed/decision
		  values on stable storage does not help if there are
		  additionally more than two eventually-down
		  processes. However, if there are more always-up
		  processes than bad processes consensus can be solved
		  even without stable storage (two increasingly
		  efficient algorithms are given). With stable storage
		  consensus is solvable if there is a majority of good
		  processes in the system (an algorithm is given).
		  So, as long as one can guarantee that more processes
		  never crash than those processes that are unstable
		  or will eventually remain down, stable storage is
		  not needed. If all processes may crash at least
		  once, stable storage and a majority of good
		  processes is needed to solve consensus. All results
		  hold for fair lossy channels."
}




@InProceedings{Aguilera:1998:FDC,
  author = 	 {Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg},
  title = 	 {Failure Detection and Consensus in the Crash-Recovery Model},
  booktitle = 	 {Proceedings of the 12th International Symposium on 
                  Distributed Computing (DISC)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {231--245},
  year = 	 {1998},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 ser-LNCS,
  OPTaddress = 	 {},
  month = 	 sep,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {Described in \cite{Aguilera:1998:FDCTR}, this is a more
                  citeable reference. Published in DC in 2000
                  \cite{Aguilera:2000:FDC}.}
}





@Article{Akguel:1998:ICS,
  author = 	 {Tayfun {Akg\"ul}},
  title = 	 {International Conference on Self-Similar Systems (Cartoon)},
  journal = 	 {IEEE -- The Institute},
  year = 	 {1998},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  pages = 	 10,
  month = 	 sep,
  OPTnote = 	 {},
  annote = 	 {Shows some linguistic resemblance to WSS.}
}



@Article{Akguel:1998:TZT,
  author = 	 {Tayfun {Akg\"ul}},
  title = 	 {Teaching the Z-Transform (Cartoon)},
  journal = 	 {IEEE -- The Institute},
  year = 	 {1998},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  pages = 	 {12},
  month = 	 nov,
  OPTnote = 	 {},
  annote = 	 {Shows a professor talking to a ZZZZ-sleeping audience.}
}

@InProceedings{Almeida:1998:ULG,
  author =       {Carlos Almeida and Paulo Ver\'{\i}ssimo},
  title =        {Using light-weight groups to handle timing failures in
                  {\em quasi-synchronous} systems},
  booktitle =    {Proceedings of the 19th IEEE Real-Time Systems Symposium},
  year =         1998,
  address =      {Madrid, Spain},
  month =        dec,
  annote =       "covers part of the work described in  
                  \cite{Almeida:1998:QSA}."  
}

@TechReport{Almeida:1998:QSA,
  author = 	 "Carlos Almeida and Paulo {Ver{\'\i}ssimo} and
		  {Ant\'{o}nio} Casimiro",
  title = 	 "The quasi-synchronous approach to fault-tolerant and
		  real-time communication and processing",
  institution =  {Instituto Superior T\'{e}cnico},
  year =         1998,
  number =       {CTI RT-98-04},
  address =      {Lisboa, Portugal},
  month =        jul,
  annote = 	 "The authors propose a new system model to use for
		  large-scale fault-tolerant distributed systems, the
		  quasi-synchronous approach. The authors augment the
		  asynchronous model by adding a timing failure
		  detector to the system. A timing failure detector
		  can perfectly detect the non-timeliness of certain
		  events within a fixed period of time. With such a
		  failure detector it is possible to build reliable
		  systems in asynchronous environments because it is
		  essentially a perfect failure detector as described
		  by Chandra and Toueg \cite{Chandra:1996:UFD}. The
		  authors argue that such a failure detector can be
		  implemented over modern ``synchronous'' network
		  communications like ATM or GSM. Thus, only part of
		  the system (control channels vs. payload channels)
		  need be synchronous, easing the burdon of practical
		  implementations. The authors give excellent reviews
		  of the current work in this area and show several
		  ways how the timeliness properties of
		  quasi-synchronous applications can be increased: (1)
		  by an early delivery causal atomic broadcast, (2) by
		  dynamically adjusting the QoS (and thus timeliness
		  deadlines) and (3) by active replication to limit
		  response times of servers. Overall, this is an
		  excellent paper which is also suited as an
		  introduction to the area (after reading
		  \cite{Chandra:1996:UFD}). Previous ideas appeared in other 
                  form in \cite{Almeida:1996:TFD,Verissimo:1995:QSS}." 
}




@InProceedings{Arndt:1998:DLD,
  author = 	 {Olaf Arndt and Bernd Freisleben and Thilo Kielmann and
                  Frank Thilo},
  title = 	 {Dynamic load distribution with the {WINNER} system},
  booktitle = 	 {Proceedings of the Workshop ``Anwendungsbezogene
                  Lastverteilung'' (ALV'98)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {77--88},
  year = 	 {1998},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {M\"unchen, Germany},
  OPTmonth = 	 {},
  organization = {Technische Universit\"at M\"unchen},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {}
}

@Article{Arora:1998:CDM,
  author =       "Anish Arora and Sandeep S. Kulkarni",
  title =        "Component based design of multitolerant systems",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-IEEE-TRANS-SOFTW-ENG,
  year =         "1998",
  volume =       "24",
  number =       "1",
  pages =        "63--78",
  month =     jan,
  OPTnote =      "",
  annote =       "Refinement of Arora's theory of closure and
                  convergence \cite{Arora:1993:CCF}: the ability to
                  tolerate certain kinds of faults is added to a
                  system in a stepwise manner by adding detectors,
                  that can detect invalidation of safety, and
                  correctors, that re-estabilish liveness. By adding
                  these components, care must be taken, that they do
                  not interfere with eachother. The application
                  example developed in the paper is a multitolerant
                  token ring protocol. The model used is the serial
                  model. The difficulties of extending it to message
                  passing models is not discussed."
}



@InProceedings{Arora:1998:DCT,
  author = 	 "Anish Arora and Sandeep S. Kulkarni",
  title = 	 "Detectors and Correctors: A theory of
		  fault-tolerance components",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  OPTpages = 	 "",
  booktitle = pro-icdcs98,
  year = 	 "1998",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  month = 	 may,
  OPTnote = 	 "",
  annote = 	 "A compact presentation and discussion of
		  \cite{Arora:1998:CDM}."
}

@Article{Arora:1998:DMF,
  author =       "Anish Arora and Sandeep S. Kulkarni",
  title =        "Designing masking fault tolerance via nonmasking
                  fault tolerance",
  OPTcrossref =  "",
  OPTkey =       "",
  journal =      j-IEEE-TRANS-SOFTW-ENG,
  year =         "1998",
  volume =    "24",
  number =    "6",
  OPTpages =     "",
  month =     jun,
  annote =       "A paper in the line of Arora's theory of correctors
                  and detectors \cite{Arora:1998:CDM}. A fault
                  intolerant program is transformed into a non-masking fault
                  tolerant program by adding correctors and then
                  transformed into a masking fault tolerant program by
                  adding detectors. Detectors inhibit normal program
                  actions when invalidation of the safety predicate is
                  observed. Thus the program only takes ``safe''
                  steps. Application examples include Byzantine
                  agreement, reliable data transfer, mutual exclusion."
}


@InProceedings{Arora:1998:SFC,
  author =       "Anish Arora and Paul C. Attie and E. Allen Emerson",
  title =        "Synthesis of fault-tolerant concurrent programs",
  pages =     "173--182",
  booktitle = pro-podc98,
  year =         "1998",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "Based on a synthesis method for concurrent programs
                  by Emerson and Clarke, this paper extends the
                  possibilities to synthesize fault-tolerant programs
                  that can tolerate a certain fault class. Faults are
                  modelled as state transitions with a possibly
                  extended state space, and recovery transitions are
                  used to tolerate these faults. The method is based
                  on temporal logic specifications. As examples,
                  solutions to mutual exclusion and barrier
                  synchronization are synthesized."
}



@InProceedings{Asokan:1998:APO,
  author = 	 {N. Asokan and Victor Shoup and Michael Waidner},
  title = 	 {Asynchronous protocols for optimistic fair exchange},
  booktitle = 	 {Proceedings of the IEEE Symposium on Research in 
                  Security and Privacy},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {86--99},
  year = 	 {1998},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  url =          "http://www.zurich.ibm.com/Technology/Security/publications/1998/ASW98.ps.gz",
  month = 	 may,
  OPTorganization = {},
  OPTpublisher = {},
  note = 	 {Printed version contains some errors. Errata sheet is
                  distributed together with the electronic version.},
  annote = 	 {[to read]}
}




@InProceedings{Asokan:1998:OFE,
  author =       "N. Asokan and Victor Shoup and Michael Waidner",
  title =        "Optimistic Fair Exchange of Digital Signatures",
  pages =        "591--606",
  note =         "A longer version is available as Technical Report 
         RZ 2973 (\#93019), IBM Research, November 1997 at 
         http://www.zurich.ibm.com/Technology/Security/publications/1997/ASW97b.ps.gz",
  booktitle =    "EuroCrypt 98",
  year =         "1998",
  publisher =    pub-SV,
  editor =       "Kaisa Nyberg",
  series =       ser-LNCS,
  annote =       "[to read]"
}



@Misc{Autexier:1998:VSE,
  OPTkey = 	 {},
  author = 	 {Serge Autexier and Dieter Hutter and Bruno Langenstein
                  and Heiko Mantel and Georg Rock and Axel Schairer and 
                  Werner Stephan and Roland Vogt and Andreas Wolpers},
  title = 	 {VSE: {Formal} methods meet industrial needs},
  howpublished = {\url{http://www.dfki.uni-sb.de/vse/papers/ahlm98.ps.gz}},
  OPTmonth = 	 {},
  year = 	 {1998},
  OPTnote = 	 {},
  annote = 	 {[to read] announced to appear in Software Tools for
                  Technology Transfer, 1998, Springer, Special issue
                  on mechanized theorem proving for technology. Contains
                  case study on ROBERTINO robot control system.}
}

@InProceedings{Beauquier:1998:TFD,
  author = 	 {Joffroy Beauquier and Sylvie {Dela\"et} and Shlomi Dolev 
                 and {S\'ebastien} Tixeuil},
  title = 	 {Transient fault detectors},
  booktitle = 	 {Proceedings of the 12th International Symposium on
                  DIStributed Computing (DISC'98)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {62--74},
  year = 	 {1998},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  number = 	 {1499},
  series = 	 ser-LNCS,
  address = 	 {Andros, Greece},
  month = 	 sep,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {[to read]}
}

@Article{Belli:1998:MHS,
  author = 	 {Fevzi Belli},
  title = 	 {{Methoden und Hilfsmittel f\"ur die systematische
                 Pr\"ufung komplexer Software}},
  journal = 	 j-IS,
  year = 	 {1998},
  OPTkey = 	 {},
  volume = 	 {21},
  number = 	 {6},
  pages = 	 {337--346},
  month = 	 dec,
  OPTnote = 	 {},
  annote = 	 {Vorstellung von konventionellen Testmethoden und 
                  Testwerkzeuge, Reviews etc.}
}

@MastersThesis{Bendrath:1998:CNR,
  author = 	 {Ralf Bendrath},
  title = 	 {{Computer und die neue Rolle des Milit\"ars in den USA}},
  school = 	 {Freie Universit\"at Berlin, Fachbereich Politische 
                  Wissenschaft}, 
  year = 	 {1998},
  OPTkey = 	 {},
  type = 	 {Diploma thesis (in German)},
  OPTaddress = 	 {},
  month = 	 aug,
  OPTnote = 	 {},
  annote = {Eine sehr detailiierte und quellenreiche Arbeit ueber den
            Einluss von Computern auf das Verhaeltnis zwischen
            Militaer und der Zivilgesellschaft. Eingegangen wird auf
            die neue Rolle des Soldaten im Krieg (Vernetzung,
            Integration von Strategie und Taktik), Automatisierung der
            Verarbeitung von militaerischen Daten (KI als
            Schluesseltechnologie und deren Gefahren), der Begriff des
            Information Warfare (Ausweitung computermilitaerischer
            Operationen auf den zivilen Bereich, zunehmende
            Ununterscheidbarkeit von militaerische und zivilen
            Operationen durch Praevention), Probleme des
            Sicherheitsbegriffs.}  
}

@InProceedings{Chandra:1998:HFF,
  author =       "S. Chandra and P.M. Chen",
  title =        "How fail-stop are faulty programs?",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  pages =     "240--249",
  booktitle = pro-ftcs98,
  year =         "1998",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  month =        jun,
  OPTnote =      "",
  annote =       "see proceedings"
}

@Article{Chase:1998:DGP,
  author = 	 {Craig M. Chase and Vijay K. Garg},
  title = 	 {Detection of global predicates: Techniques and their 
                  limitations},
  journal = 	 j-DC,
  year = 	 {1998},
  OPTkey = 	 {},
  volume = 	 {11},
  number = 	 {4},
  pages = 	 {191--201},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  abstract = {We show that the problem of predicate detection in
      distributed systems is NP-complete.  In the past, efficient
      algorithms have been developed for special classes of predicates
      such as stable predicates, observer independent predicates, and
      conjunctive predicates. We introduce a class of predicates,
      semi-linear predicates, which properly contains all of the above
      classes. We first discuss stable, observer independent and
      semi-linear classes of predicates and their relationships with
      each other. We also study closure properties of these classes
      with respect to conjunction and disjunction.  Finally, we
      discuss algorithms for detection of predicates in these
      classes. We provide a non-deterministic detection algorithm for
      each class of predicate. We show that each class can be
      equivalently characterized by the degree of non-determinism
      present in the algorithm. Stable predicates are defined as those
      that can be detected by an algorithm with the most
      non-determinism.  All other classes can be derived by
      appropriately constraining the non-determinism in this
      algorithm.},
  annote = 	 {[to read]}
}


@InProceedings{Cristian:1998:TAS,
  author = 	 "Flaviu Cristian and Cristof Fetzer",
  title = 	 "The timed asynchronous distributed system model",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  pages = 	 "140--149",
  booktitle = pro-ftcs98,
  year = 	 "1998",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  month = 	 jun,
  OPTnote = 	 "",
  annote = 	 "The authors present a formal definition of a system
		  model that is claimed to capture the current
		  behavior of distributed systems like the
		  Internet. The model makes the following assumptions:
		  (1) processes have hardware clocks that have bounded
		  drift rate, (2) processes communicate via unreliable
		  datagram service with broadcast facility that has
		  omission/performance failure semantics, (3)
		  processes have crash/performance failure semantics,
		  (4) there is no bound on load or failure rate, (5)
		  services are usually timed, i.e., their
		  specification prescribes a time interval within
		  which some transitions will occur. Together with the
		  optional extensions of stable storage and progress
		  assumptions the authors claim that this model
		  adequately reflects todays ``reality'', since
		  important problems (like consensus etc.) are
		  solvable in the Internet. The model also caters for
		  network partitions (they are modeled by sufficiently
		  many crash/omission failures).  The notion of a
		  bounded drift rate is sufficient to implement a
		  failure detector that detects untimeliness of
		  processing or responses and thus can be used to
		  build fail-aware services \cite{Fetzer:1997:FAA}."
}



@Article{Echtle:1998:FMB,
  author = 	 {Klaus Echtle and {Jo\~ao} Gabriel Silva},
  title = 	 {{Fehlerinjektion -- ein Mittel zur Bewertung
                 der Ma\ss{}nahmen gegen Fehler in komplexen 
                 Rechnersystemen}},
  journal = 	 j-IS,
  year = 	 {1998},
  OPTkey = 	 {},
  volume = 	 {21},
  number = 	 {6},
  pages = 	 {328--336},
  month = 	 dec,
  OPTnote = 	 {},
  annote = {Empirische Verl\"asslichkeitsbewertung im Gegensatz zu
  analytischer (vgl. \cite{Thurner:1998:VKS}). Englische Referenz ist
  \cite{Hsueh:1997:FIT}. Die Autoren beleuchten alle wesentlichen
  Aspekte moderner Fehlerinjektionstechniken und beschreiben die
  Zusammenhaenge zu anderen Gebieten der Informatik. Z.B. die N\"ahe
  der Fehlerinjektion zum normalen Software-Test und zur formalen
  Verifikation. Letztere st\"o\ss{}t aber oft an Leistungsgrenzen,
  w\"ahrend Fehlerinjektion fast immer einen gegebenen Aufwandsrahmen
  ausf\"ullen kann. Zun\"achst werden Techniken der physikalischen
  Fehlerinjektion (Einwirkung auf Pins, Bestrahlung durch Schwerionen
  oder elektromagnetische Strahlung) und software-implementierte
  Fehlerinjektion besprochen. Letztere unterscheidet sich in Injektion
  auf der Komponenten-Ebene (direkte Ver\"anderung des Codes, direkte
  \"Anderung von Variablen, Programmz\"ahler oder Register) und auf
  der System-Ebene (Abschw\"achung von Annahmen \"uber andere
  unabh\"angige Prozesse an der Nachrichtenschnittstelle). Die
  Verl\"a\ss{}lichkeitsbewertung und die Fehlererfassung
  (engl. coverage) geschieht dann durch Auswahl geeigneter,
  realistischen Fehlerszenarien und einer ausreichenden Anzahl von
  Experimenten. Abschlie\ss{}end wird auf den Test von
  Fehlertoleranzverfahren in verteilten Systemen eingegangen: Der
  Begriff des Fehlerbereiches wird eingef\"uhrt um die \"ublichen
  Fehlermodelle (crash, Byzantine, etc.) zu beschreiben. Fehler
  k\"onnen dann an der Nachrichtenschnittstelle injiziert werden. Im
  Gegensatz zur formalen Verifikation bietet diese Technik den
  Vorteil, da\ss{} das System in einer ``realen'' Umgebung getestet
  wird. Ein paar g\"angige Fehlerinjektoren werden
  vorgestellt. Insgesamt ein guter \"Uberblick mit einer Art
  Markt\"ubersicht \"uber Fehlerinjektoren. Verwiesen wird bei der
  formalen Verifikation und Fehlermodellierung auf
  \cite{Echtle:1984:FSV}.}
}

@InProceedings{Gaertner:1998:EFR,
  author =       {Felix C. {G\"artner} and Henning Pagnia},
  title =        "Enhancing the fault tolerance of replication:
                  another excercise in constrained convergence",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTeditor =    "",
  OPTvolume =    "",
  OPTnumber =    "",
  OPTseries =    "",
  pages =     "29--30",
  booktitle = pro-ftcs98-fastabs,
  year =         "1998",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress =   "",
  month =        jun,
  OPTnote =         "",
  OPTannote =    ""
}

@InProceedings{Fetzer:1998:MCM,
   author    = {Christof Fetzer},
   title     = {The Message Classification Model},
   booktitle = {Proceedings of the 17th ACM Symposium on Principles of Distributed Computing},
   year      = {1998},
   month     = jun,
   address   = {Puerto Vallarta, Mexico},
   url      = {http://www.research.att.com/~christof/MCM},
   abstract = " We propose a new system model for asynchronous
       distributed systems that we call the message classification
       model. Motivation for this model is its ability 1) to support a
       restricted but useful form of ``communication by time'' by
       classiying messages as either ``slow'' or ``fast'' but without
       incorporating neither real-time clocks nor ``time-outs'', and
       2) to describe transient and permanent network partitions. The
       message classification model allows the definition of different
       classes of classification schemes. To show that the model is
       indeed useful, we show how one can solve the consensus and the
       election problem for a certain class of message classification
       schemes.",
  annote = " Contains a good overview and comparison of different models
       [to read]"  
}




@TechReport{Gaertner:1998:FFT,
  author = 	 "Felix C. {G\"artner}",
  title = 	 "Fundamentals of fault tolerant distributed computing
		  in asynchronous environments",
  institution =  "Darmstadt University of Technology",
  year = 	 "1998",
  number = 	 "TUD-BS-1998-02",
  address = 	 "Darmstadt, Germany",
  month = 	 jul,
  url = 	 "http://www.informatik.tu-darmstadt.de/BS/Gaertner/publications/TUD-BS-1998-02.ps",
  note =         "To appear in \textit{ACM Computing Surveys}, 31(1), March 1999.",
  annote = 	 "A generalization of Arora and Kulkarni's theory of
		  correction and detection \cite{Arora:1998:CDM} for
		  the asynchronous message passing model. The paper
		  first defines formally important terms like
		  redundancy, fault and fault tolerance. Then it shows
		  that fault tolerance cannot be achieved without
		  redundancy and reveals the two phases necessary in
		  fault tolerance: detection and correction. Detection
		  is generalized to possibility detection in
		  distributed systems and correction is generalized to
		  imposing a predicate on the system. Fundamental
		  methodologies of fault tolerant distributed
		  computing (like fail stop processors, state machine
		  approach, consensus) are shown to fit nicely into
		  the framework."
}


@TechReport{Gaertner:1998:SFT,
  author = 	 "Felix C. {G\"artner}",
  title = 	 "Specifications for Fault Tolerance: {A} Comedy of Failures",
  institution =  "Darmstadt University of Technology",
  year = 	 "1998",
  number = 	 "TUD-BS-1998-03",
  address = 	 "Darmstadt, Germany",
  month = 	 oct,
  url = 	 "http://www.informatik.tu-darmstadt.de/BS/Gaertner/publications/TUD-BS-1998-03.ps.gz",
  annote = 	 "[to write]"
}







@Article{Gamache:1998:WCS,
  author = 	 {Rod Gamache and Rob Short and Mike Massa},
  title = 	 {Windows {NT} clustering service},
  journal = 	 j-IEEE-COMPUTER,
  year = 	 1998,
  OPTkey = 	 {},
  volume = 	 31,
  number = 	 10,
  pages = 	 "55--62",
  month = 	 oct,
  OPTnote = 	 {},
  annote = {A colourful article that praises the clustering service
            for high availability in NT 5.0. A service may be
            implemented on a cluster of servers (i.e., a set of
            identical machines) that all together transparently
            provide the service as if one single server were
            present. Hardware and software failures can be detected
            and failed applications can be restarted on other machines
            without interrupting the overall mode of
            operation. Several issues have not been touched yet
            because of ``technical complexity or schedule pressures'':
            these are active replication, process pairs,
            primary-backup, non-stop migration of processes and
            recovery of shared state between client and server. ``They
            will be added to future versions of the product.''}  
}

@InProceedings{Garg:1998:DPD,
  author = 	 "Vijay K. Garg and J. Roger Mitchell",
  title = 	 "Distributed predicate detection in a faulty environment",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  OPTpages = 	 "",
  booktitle = pro-icdcs98,
  year = 	 "1998",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  OPTmonth = 	 "",
  OPTnote = 	 "",
  annote = 	 "The first real reference on general predicate
		  detection in faulty environments. Several issues in
		  this area are discussed: what type of failure
		  detectors must be used? What if they produce false
		  suspicions? How does this affect the validity of the
		  global predicate? The authors present an algorithm
		  to reliably detect a subclass of general predicates
		  in an asycnhronous message-passing environment
		  subject to process crashes, message loss and channel
		  crashes. The type of predicates they detect are
		  set-decreasing and conjunctive. Set-decreasing means
		  that whenever it holds for a set $S$ of processes,
		  then it also holds for a set $S'\subseteq
		  S$. Conjunctive means that it can be written as the
		  conjunction of local predicates and send-monotonic
		  channel predictas. Send-monotonic channel predicates
		  are those that if it is false, merely sending messages
		  can't make it true. The algorithm is based on one by
		  Hurfin, Mizuno, Raynal and Singhal
		  \cite{Hurfin:1996:ODC} for detecting conjunctions of
		  local predicates. Every process acts as a Monitor
		  process and control messages are piggybacked on
		  application messages. The application must ensure
		  that eventually every process sends a message to
		  every neighbour. Predicate detection is performed by
		  constructing the lattice of consistent global states
		  starting from an ``earliest'' state at every
		  process. Nodes that are suspected to have failed are
		  not inspected for predicate evaluation. This is okay
		  for this special type of predicates. The failure
		  detector used satisfies weak completeness and
		  infinitely often accuracy, meaning that every
		  correct process is never permanently suspected. This
		  is a weaker failure detector than the ``eventually
		  weak'' failure detector of \cite{Chandra:1996:UFD}."
}



@InProceedings{Garg:1998:IFD,
  author = 	 {Vijay K. Garg and J. Roger Mitchell},
  title = 	 {Implementable failure detectors in asynchronous systems},
  booktitle = 	 {Proc. 18th Conference on Foundations of Software 
                  Technology and Theoretical
                  Computer Science},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1998},
  OPTeditor = 	 {V. Arvind and R. Ramanujin},
  OPTvolume = 	 {},
  number = 	 {1530},
  series = 	 ser-LNCS,
  address = 	 {Chennai, India},
  month = 	 dec,
  OPTorganization = {},
  publisher = pub-SV,
  url = "\url{http://maple.ece.utexas.edu/TechReports/1998/TR-PDS-1998-004.ps.Z}",
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@Book{Gertler:1998:FDD,
  author = 	 {J. Gertler},
  ALTeditor = 	 {},
  title = 	 {Fault Detection and Diagnosis in Engineering Systems},
  publisher = 	 {Marcel Dekker},
  year = 	 {1998},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {New York},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[Angabe von Armin]}
}

@Article{Grosspietsch:1998:FKN,
  author = 	 {Karl-Erwin {Gro\ss{}pietsch} and Erik Maehle},
  title = 	 {{Fehlerbehandlung in komplexen nebenl\"aufigen
                 Systemen}},
  journal = 	 j-IS,
  year = 	 {1998},
  OPTkey = 	 {},
  volume = 	 {21},
  number = 	 {6},
  pages = 	 {347--355},
  month = 	 dec,
  OPTnote = 	 {},
  annote = {Konzentriert sich auf Fehlertoleranzmassnahmen zur Wahrung
     von bestimmten Systemtopologien (Array, Baum, etc.). Stichworte:
     dynamische Redundanz, Rekonfiguration, fehlertolerantes Routing,
     Recovery.}
}

@InCollection{Hohl:1998:TLB,
  author =       {F. Hohl},
  title =        {Time Limited Blackbox Security: Protecting Mobile
                 Agents from Malicious Hosts},
  booktitle =    {Mobile Agents and Security},
  crossref =     {Vigna:1998:MAS},
  pages =        {92--113},
  annote = "referenz von Uwe Wilhelm"
}

@Book{Hoffmann:1998:DMD,
  author = 	 {Paul Hoffmann},
  title = 	 {{Der Mann, der die Zahlen liebte}},
  publisher = 	 {Ullstein},
  year = 	 {1998},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {Biographie von Paul Erd\"os.}
}


@InProceedings{Hurfin:1998:CAS,
  author = 	 {Michel Hurfin and A. {Most\'efaoui} and M. Raynal},
  title = 	 {Consensus in asynchronous systems where processes 
                  can crash and recover},
  booktitle = 	 {Proceedings of the 17th IEEE Symposium on Reliable 
                  Distributed Systems (SRDS'98)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {280--286},
  year = 	 {1998},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {West Lafayette, Indiana},
  month = 	 oct,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  annote = 	 {Hinweis aus FG Fehlertolerierende Rechnersysteme
                  Mitteilungen, Maerz 1999. Previously a 
                  Technical Report \cite{Hurfin:1997:CAS}.}
}

@InProceedings{Hutter:1998:VSE,
  author = 	 {Dieter Hutter and Heiko Mantel and Georg Rock and 
                  Werner Stephan and Andreas Wolpers and Michael Balser
                  and Wolfgang Reif and Gerhard Schellhorn and Kurt Stenzel},
  title = 	 {{VSE:} {Controlling} the Complexity in Formal Software 
                  Developments},
  booktitle = 	 {Proceedings of the International Workshop on Applied 
                  Formal Methods},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1998},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Boppard, Germany},
  OPTmonth = 	 {},
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {overview over VSE-II.}
}



@Article{Kaiser:1998:EDV,
  author = 	 {{J\"org} Kaiser and Edgar Nett},
  title = 	 {{Echtzeitverhalten in dynamischen, verteilten Systemen}},
  journal = 	 j-IS,
  year = 	 {1998},
  OPTkey = 	 {},
  volume = 	 {21},
  number = 	 {6},
  pages = 	 {356--365},
  month = 	 dec,
  OPTnote = 	 {},
  annote = {Behandelt den Faktor ``Zeit'' in fehlertoleranten
    verteilten Systemen. Dabei sind zwei Aspekte von Bedutung:
    Kommunikation und Scheduling. Kommunikation in Echtzeitsystemen
    mu\ss{} (1) Vorhersagbarkeit und (2) Kooperation
    gew\"ahrleisten. (1) Vorhersagbarkeit bedeutet eine obere Schranke
    auf Nachrichtenverz\"ogerung und eine Garantie von Eigenschaften
    unter Spitzenlast. Bei ersterem benutzt man reservierungsbasierte
    Verfahren (TDMA, braucht globale Zeit) und token-basierte
    (token-Ring, braucht keine globale Zeit). Ungeeignet ist Ethernet,
    obwohl darauf auch andere Verfahren implementiert werden
    k\"onnen. ATM ist eine Mischform. Kommunikationsfehler entstehen
    in der Wertedom\"ane und der Zeitdom\"ane. Sie werden in
    Fehlersemantiken wie omission oder crash beschrieben und m\"ussen
    toleriert werden. (2) Kooperation bedeutet Ordnung auf Nachrichten
    und Mitgliedschaft. In Echtzeitsystemen mu\ss{} man irgendwie Zeit
    mitspezifizieren. Es kann eine globale, synchronisierte Zeit
    angenommen werden (synchrone, eng synchronisierte Systeme) oder es
    werden zeitliche Systemannahmen lokal \"uber einen Timeout
    realisiert (zeitgesteuerte, asynchrone (engl. timed asynchronous),
    lose synchronisierte Systeme). Im eng synchronisierten Fall
    gen\"ugt Nachrichtendiffusion bei ausreichender Redundanz. In
    asynchronen Systemen braucht man eine Best\"atigung. Unter Hinweis
    auf \cite{Fischer:1985:IDC} wird bemerkt, da\ss{} irgendwelche
    Zeitannahmen (und seien sie nur unzuverl\"assig
    \cite{Chandra:1996:UFD}) ben\"otigt werden, um Konsens zu
    erzielen, auf welche Nachrichten sich noch zu warten lohnt und
    welche verloren gingen. Diese Zeitannahmen werden als
    Gleichm\"a\ss{}igkeit (steadyness) und Laufzeitvarianz (tightness)
    bezeichnet \cite{Verissimo:1993:RTC}. Anschlie\ss{}end wird auf
    Schedulingverfahren eingegangen. Als Anwendungsbeispiel wird die
    GMD-Snake Roboterschlange beschrieben.}
}

@Article{Karat:1998:GRU,
  author = 	 {Clare-Marie Karat},
  title = 	 {Guaranteeing Rights for the User},
  journal = 	 j-CACM,
  year = 	 {1998},
  OPTkey = 	 {},
  volume = 	 {41},
  number = 	 {12},
  pages = 	 {29--31},
  month = 	 dec,
  OPTnote = 	 {},
  annote = 	 {Contains a ``user's bill of rights'' containing
     such items as ``the user is always right'' and ``the user has
     the right to a system that performs exactly as promised''. 
     This is meant as a challenge to the computer industry to change
     its current view and points to the problem that dependency on
     hard- and software tends to become bigger as maintaining personell
     and the industry are able to exploit their sole understanding
     of how things work.}
}

@PhdThesis{Kekkonen:1998:RFA,
  author = 	 "Synn{\"o}ve Kekkonen",
  title = 	 "{R\'esistance} aux {Fautes} dans les {Algorithmes}
		  {R\'epartis}: {Auto-Stabilisation} et {Tol\'erance}
		  aux {Fautes}", 
  school = 	 "{Universit\'e} de Paris-Sud, France",
  year = 	 "1998",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTaddress = 	 "",
  OPTmonth = 	 "",
  OPTtype = 	 "",
  OPTnote = 	 "",
  annote = 	 "English title is: ``On Failure Resilience of
		  Distributed Protocols: Self-Stabilization and
		  Fault-Tolerance.'' {Synn\"ove} investigates the
		  (im)possibility of achieving reliability in the
		  presence of systemic and process failures much in
		  the tradition of Anagnostou and Hadzilacos
		  \cite{Anagnostou:1993:TTP}. Failures are modeled as
		  state transitions in the tradition of Arora and
		  Gouda \cite{Arora:1993:CCF} and there are hints to
		  defining a failure model as a program
		  ``augmentation''. The thesis is developed in three
		  stages: first, there is an elaborate chapter on
		  modelling distributed systems as transition systems
		  and defining/proving fault tolerance properties on
		  them. Second, the self-stabilization approach is
		  used to build stabilizing failure detectors and to
		  solve torus orientation in anonymous
		  networks, where the non-terminating nature of the
		  self-stabilization paradigm interfaces well with
		  the impossibility of a terminating solution for the
		  problem.  Third, the possibility of simultaneous
		  resilience to process and systemic failures is
		  investigated. Kekkonen proves a main impossibility
		  result: if a problem is $k$-fault-sensitive in an
		  asynchronous $(j,k)$-restrictable network subject to
		  $k>0$ process crashes, then there exists no
		  $k$-fault-tolerant self-stabilizing solution to the
		  problem. A network is $(j,k)$-restrictable if some
		  subnetwork of $j$ nodes can be replaced by a network
		  of $k$ nodes without changing the ``interface''
		  structure (e.g., replacing 5 successive nodes in a
		  ring by a single one). A problem is
		  $k$-fault-sensitive for a specific network if there
		  is a $(j,k)$-restriction of the network and the
		  protocol would reach different solutions depending
		  on whether these $j$ processes are alive or $k$
		  processes have crashed. This is an extension of the
		  result of Anagnostou and Hadzilacos
		  \cite{Anagnostou:1993:TTP} and their notion of
		  failure sensitivity. Examples of $k$-fault-sensitive
		  problems are computing the size of a ring and the
		  $c$ coloring problem on rings. Examples of
		  fault-insensitive problems are unique naming,
		  non-trivial eventual consensus and ring
		  orientation. A heuristic for finding out whether a
		  problem is fault-insensitive or not is assume that
		  the problem can be solved, and then comparing the
		  set of legitimate states of systems on different
		  restrictions of the original network. If they do not
		  differ, then the problem is
		  fault-insensitive. Overall this is a very thorough
		  and concise thesis, originally written and defended
		  in french."
}

@InProceedings{Kreitz:1998:PED,
  author =       "Christoph Kreitz and Mark Hayden and Jason Hickey",
  title =        "A proof environment for the development of group
                  communication systems",
  OPTcrossref =  "",
  OPTkey =       "",
  editor =       "H. Kirchner",
  OPTvolume =    "",
  OPTnumber =    "",
  series =       "Lecture Notes in AI",
  OPTpages =     "",
  booktitle = "15th International Conference on Automated Deduction",
  year =         "1998",
  OPTorganization = "",
  publisher = pub-SV,
  OPTaddress =   "",
  OPTmonth =     "",
  OPTnote =      "",
  OPTannote =    "Ensenble is a group communication environment in the
                  tradition of Isis and written in OcaML, a language
                  similar to ML and thus well suited to be manipulated
                  with NuPRL. The authors show how to import Ensemble
                  code into NuPRL, verify certain aspects of a
                  specification and export the code again for
                  execution. Fault-tolerance is added by using failure
                  detectors and focus is put on safety
                  requirements. Timed I/O automata are used as the
                  basis for formal reasoning about distributed systems.
                  [bibliographic data needs polish!]"
}


@Unpublished{Kreitz:1998:SWL,
  author =       "Christoph Kreitz",
  title =        "``{\textit{Safety}} ist wichtig, {\textit{liveness}}
                  sieht man.''",
  note =         "Personal communication.",
  OPTcrossref =  "",
  OPTkey =       "",
  year =         "1998",
  month =        mar,
  annote =       "Annotation during a talk on the Ensemble system at
                  TU Darmstadt, March 12th, 1998, concerning a proof
                  of a safety property. Liveness was up to that time
                  of no concern in the project
                  \cite{Kreitz:1998:PED}. See also the paper on Ariane
                  5 \cite{Dega:1996:RMA}, which supports this claim."
}


@Article{Kshemkalyani:1998:NSC,
  author =       "Kshemkalyani and Singhal",
  title =        "Necessary and Sufficient Conditions on Information for
                 Causal Message Ordering and their Optimal
                 Implementation",
  journal =      j-DC,
  volume =       "11",
  pages =        "91--111",
  year =         "1998",
  annote =       "[to read]"
}


@InProceedings{Lamport:1998:CWM,
  author = 	 {Leslie Lamport},
  title = 	 {Composition: {A} way to make proofs harder},
  booktitle = 	 {Compositionality: The Significant Difference (Proceedings
                  of the COMPOS'97 Symposium)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {402--423},
  year = 	 {1998},
  editor = 	 {Willem-Paul de Roever and Hans Langmaak and Amir Pnueli},
  OPTvolume = 	 {},
  number = 	 {1536},
  series = 	 ser-LNCS,
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {Good title - good paper. Argues that compositionality makes
      proofs grow fast and that the additional effort is substantial
      if no automation is used. Gives an example.}
}


@TechReport{Liu:1998:SVF,
  author = 	 {Zhiming Liu and Mathai Joseph},
  title = 	 {Specification and verification of fault-tolerance, 
                  timing and scheduling},
  institution =  {Department of Mathematics and Computer Science, 
                  University of Leicester, U.K.},
  year = 	 {1998},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {1998/5},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = {Accepted at ACM TOPLAS. Extends earlier work of Liu and
    Joseph \cite{Liu:1992:TPF,Liu:1993:SVR,Liu:1994:SDF,Liu:1996:VFR}
    to transformational reasoning about fault-tolerant real-time
    systems. The formalism used is TLA and only safety properties of
    programs are considered. The computational model and the way to
    reason about fault tolerant systems using refinement is
    essentially the same as in \cite{Liu:1996:VFR,Liu:1992:TPF} but is
    nicely summarized and brought into the TLA framework. The main
    body of the paper deals with timing and scheduling. These isuues
    are introduced into the formalism by introducing lower and upper
    time bounds to actions and adding a real time clock (similar to
    \cite{Abadi:1994:OFR}). The global fault assumption must be
    extended to specify minimum time lengths in which faults are not
    repeated and by assuming that the scheduler is not subject to
    faults. Much interest is laid on timing feasability meaning
    whether there exists a scheduler to schedule a program
    correctly. It is shown how to reason compositionally about
    programs combined with schedulers, thus abstracting away from any
    specific implementation or policy. This is exemplified by taking a
    fixed priority scheduling scheme from the literature and using it
    to show feasibility. Discussion of related work mostly covers
    scheduling work, while initial historical remarks also deal with
    formal methods in fault tolerance. A very good paper; can be seen
    as quintessenz of Liu and Joseph's work over the last decade.}
}

@Article{Marcopulos:1998:FBC,
  author = 	 "Ted Marcopulos",
  title = 	 "Faster, better, cheaper space exploration",
  OPTcrossref =  "",
  OPTkey = 	 "",
  journal = 	 "IEEE Spectrum",
  year = 	 "1998",
  volume = 	 "34",
  number = 	 "8",
  pages = 	 "68--74",
  month = 	 aug,
  OPTnote = 	 "",
  annote = 	 "The author surveys NASAs recent attempts to apply
		  commercial management and development schemes to
		  their current space exploration programs. It turns
		  out that there is a strive towards eliminating
		  redundancy in large parts of the system because
		  components are already reliable enough for unmanned
		  space flight and redundancy is costly both in
		  weight, dollars and software/hardware
		  complexity. This is a good reference together with
		  \cite{Dega:1996:RMA}."
}



@Article{Marcus:1998:WTD,
  author = 	 {Stephen J. Marcus},
  title = 	 {What to do about bolts from the blue},
  journal = 	 j-IEEE-COMPUTER,
  year = 	 1998,
  OPTkey = 	 {},
  volume = 	 35,
  number = 	 12,
  pages = 	 "34--41",
  month = 	 dec,
  OPTnote = 	 {},
  annote = {Fascinating report on the danger of the earth being hit by
     an asteroid and the issues involved. A large scale example of being
     able to tolerate severe faults by detection and correction.}
}

@InProceedings{Merritt:1998:FSO,
  author =       "Michael Merritt and Gadi Taubenfeld",
  title =        "Fairness of Shared Objects",
  booktitle =    {Proceedings of the 12th International Symposium on
                  DIStributed Computing (DISC'98)},
  pages =        "303--316",
  year =         "1998",
  series =       ser-LNCS,
  number =       "1499",
  month =        sep,
  address =      "Andros, Greece",
  annote = "Here, fairness is not defined with respect to processes or
  schedulers, but with respect to accesses to distinct shared
  objects. This is a way of encapsulating fairness assumptions (and
  thus timing assumptions) into modules quite nicely. Four types of
  fair objects are considered: dedalock-free (if some process tries to
  access some object, eventually some process will succeed to access
  that object), starvation-free (if a process tries to access an
  object, then he will eventually succeed), bounded-waiting
  (deadlock-free and there is an (unknown?) upper bound $r$ on the
  number of times that some other process can access an object before
  another process wanting to access the object), $r$-bounded-waiting
  (deadlock-free and there is a fixed upper bound $r$ on the number of
  times other processes can succeed before myself). It turns out that
  deadlock-free objects are weaker than starvation-free objects (using
  starvation-free objects makes some problems solvable),
  starvation-free and bounded-waiting objects are ``similar'' and
  $r$-bounded-waiting objects are much stronger than bounded-waiting
  objects. A nice result shows that safety properties are immune to
  fairness assumptions (similar result is attributed to
  \cite{Alur:1997:TAA}. There's a good related work section discussing
  the relationship between time/fairness and system models."
}



@InProceedings{Riordan:1998:CEP,
  author = 	 {J. Riordan and B. Schneier},
  title = 	 {A Certified E-Mail Protocol with No Trusted Third Party},
  booktitle = 	 {Proceedings of the 13th Annual Computer Security Applications Conference},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1998},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  month = 	 dec,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {Presents the same protocol as \cite{Pagnia:1997:TMP} 
                  and relates it to the current Internet infrastructure.
                  Get it at: http://www.counterpane.com/certified-email.html}
}

@InProceedings{Rothermel:1998:FPP,
  author =       "Kurt Rothermel and Markus Stra{\ss}er",
  year =         "1998",
  title =        "{A Fault-Tolerant Protocol for Providing the
                 Exactly-Once Property of Mobile Agents}",
  booktitle =    "Proc. 17th IEEE Symposium on Reliable Distributed
                 Systems 1998 (SRDS'98)",
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos, California",
  pages =        "100--108",
  annote =       "[to read]"
}

@InCollection{Sander:1998:PMA,
  author =               {T. Sander and C. F. Tschudin},
  title =                {Protecting Mobile Agents Against Malicious Hosts},
  booktitle =    {Mobile Agents and Security},
  crossref =     {Vigna:1998:MAS},
  annote ="Angabe von Uwe Wilhelm"
}

@InProceedings{Sander:1998:TMC,
  author =       "T. Sander and C. Tschudin",
  title =        "Towards Mobile Cryptography",
  added-at =     "Wed Apr 8 11:17:26 1998",
  abstract =     "Mobile code technology has become a driving force for
                 recent advances in distributed systems. The concept of
                 mobility of executable code raises major security
                 problems. In this paper we deal with the protection of
                 mobile code from possibly malicious hosts. We
                 conceptualize on the specific cryptographic problems
                 posed by mobile code. We are able to provide a solution
                 for some of these problems: We present techniques how
                 to achieve ``non--interactive computing with encrypted
                 programs'' in certain cases and give a complete
                 solution for this problem in important instances. We
                 further present a way how a agent might securely
                 perform a cryptographic primitive, digital signing, in
                 an untrusted execution environment. Our results are
                 based on the use of homomorphic encryption schemes and
                 function composition techniques.",
  online =       "http://www.icsi.berkeley.edu/~tschudin/ps/ieee-sp98.ps.gz",
  booktitle =    "Proceedings of the {IEEE} Symposium on Research in
                 Security and Privacy",
  address =      "Oakland, CA",
  year =         "1998",
  publisher =    pub-IEEE,
  month =        may,
  OPTorganization = "{IEEE} Computer Society, Technical Committee on
                 Security and Privacy",
  annote =       "interesting paper doing a significant step towards
                 protecting mobile code from it's host without requiring
                 trusted hardware. currently only solutions for
                 rationals/polynomial functions are outlined (but not
                 yet for boolean circuits (equivalent to turing machines
                 !)) and there is also still a need for secure
                 birational functions to make the ideas work.",
}


@InProceedings{Schneider:1998:FAN,
  author =       "Steve Schneider",
  title =        "Formal Analysis of a Non-Repudiation Protocol",
  booktitle =    "PCSFW: Proceedings of The 11th Computer Security
                 Foundations Workshop",
  publisher =    "IEEE Computer Society Press",
  year =         "1998",
  pages =        "54--65",
  annote = "The author presents a formal analysis of Zhou/Gollmann
  fair non-repudiation protocol \cite{Zhou:1996:FNP} (which is in fact
  similar to the protocol of \cite{Pagnia:1999:EGP}). The formalism
  used is CSP \cite{Hoare:1984:CSP}. Apart from the rigor in which the
  protocol is modeled and proved, an interesting fact here is that the
  author also stumbles over the necessity of liveness in the
  specification (an aspect discussed in \cite{Pagnia:1999:IFE}): state
  can be ``imposed'' on a process by assuring that it is able to make
  a state change if the process wants to. This is formalized as the
  following liveness property: if process A wants to make a state
  change depending on the receipt of message m from the trusted third
  party, then A will eventually receive m. This implies that the
  trusted third party is continuously available and has m ready and
  waiting for delivery to A. In this paper, A queries the trusted
  third party. Consequently, reliable communication to the trusted
  authority must be assumed. Another interesting point is a
  `generates' relation between messages which is used in the
  proof. This reminds of the formalization of non-cooperative
  Byzantine faults \cite{Echtle:1999:UCB}."
}

@Article{Schneier:1998:CDV,
  author = 	 {Bruce Schneier},
  title = 	 {Cryptographic design vulnerabilities},
  journal = 	 j-COMPUTER,
  year = 	 {1998},
  OPTkey = 	 {},
  volume = 	 {31},
  number = 	 {9},
  pages = 	 {29--33},
  month = 	 sep,
  OPTnote = 	 {},
  annote = 	 {Briefly discusses the notions of detection and
     correction in the context of cryptography and security.}
}




@InProceedings{Siegel:1998:FVS,
  author = 	 {Michael Siegel},
  title = 	 {Formal verification of stabilizing systems},
  booktitle = 	 {Proceedings of the 5th International Symposium on
                  Formal Techniques in Real Time and Fault Tolerant Systems
                  (FTRTFTS'98)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1998},
  editor = 	 {Anders P. Ravn and Hans Rischel},
  OPTvolume = 	 {},
  number = 	 {1486},
  series = 	 ser-LNCS,
  address = 	 {Lyngby, Denmark},
  month = 	 sep,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {Describes a calculus to perform formal proofs of
   stabilizing algorithms. The environment are fair transitions systems
   and temporal logic. Gives proof rules for composing and refining
   stabilizing systems.}
}

@InProceedings{Singhai:1998:SFI,
  author = 	 "Ashish Singhai and Swee-Boon Lim and Sanjay R. Radia",
  title = 	 "The {SunSCALR} framework for Internet Servers",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  pages = 	 "108--117",
  booktitle = pro-ftcs98,
  year = 	 "1998",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  month = 	 jun,
  OPTnote = 	 "",
  annote = 	 "First available implementation of a self-stabilizing
		  algorithm in an industrial product. Also an example
		  for the applications of non-masking fault tolerance."
}

@inproceedings{Stoller:1998:ASB,
  author = "Scott D. Stoller and Fred B. Schneider",
  title = "Automated Stream-Based Analysis of Fault-Tolerance",
  booktitle = "Formal Techniques in Real-Time and Fault-Tolerant Systems",
  publisher = pub-SV,
  series = ser-LNCS,
  volume=1486,
  pages="113-122",
  year=1998,
  month=sep,
  address="Lyngby, Denmark",
  url = "\url{http://ftp.cs.indiana.edu/pub/stoller/FTRTFT98-extended.ps.gz}",
  annote = "[to read]"
}



@InProceedings{Tarafdar:1998:AFC,
  author = 	 {Ashis Tarafdar and Vijay K. Garg},
  title = 	 {Addressing false causality while detecting predicates in
                  distributed programs},
  booktitle = 	 pro-icdcs98,
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {94--101},
  year = 	 {1998},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Amsterdam, The Netherlands},
  OPTmonth = 	 {},
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  url = "http://www.ece.utexas.edu/~garg/dist/dcs98-ashis.ps.Z",
  annote = {The general causality relation based on the happened
    before relation imposes a causal order on events which may be
    causally unrelated. For example, two successive events on one
    process are causally ordered by deault, but this order may simply
    have been imposed by a random scheduler and the events are in fact
    events on two independent threads of the process. This is called
    `false causality' and has been a critique of the happened-before
    model of distributed computations. The authors present a way of
    extending the partial order model to ``split up'' the execution
    oder of a process into multiple threads and actually treating
    independent events as such in the causality relation. This adds
    complexity to the problem and they show that it becomes NP
    complete (the original problem is NP complete already
    \cite{Chase:1998:DGP}). However, for a restricted class of
    predicates (weak conjunctive ones) they give an efficient
    algorithm to detect them. States that approaches to predicate
    detection fall into three classes: (1) snapshot based ones
    \cite{Chandy:1985:DSD} only suitable for stable predicates, (2)
    lattice construction based ones \cite{Cooper:1991:CDG} and (3)
    restriction based approaches like those of Garg.}
}

@InProceedings{Theel:1998:OPS,
  author = 	 "Oliver Theel and Felix C. {G\"artner}",
  title = 	 "On proving the stability of distributed algorithms:
		  self-stabilization vs. control theory",
  OPTcrossref =  "",
  OPTkey = 	 "",
  editor = 	 "Vladimir B. Bajic",
  volume = 	 "III",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  pages = 	 "58--66",
  booktitle = "Proceedings of the International Systems, Signals,
		  Control, Computers Conference (SSCC'98), Durban,
		  South Africa", 
  year = 	 "1998",
  OPTorganization = "",
  OPTpublisher = "",
  OPTaddress = 	 "",
  month = 	 sep,
  note = 	 "",
  annote = 	 "[to write ;-)]"
}





@Article{Thurner:1998:VKS,
  author = 	 {Erwin Thurner and Mario Dal Cin and Winfried 
                  {Schneewei\ss{}}},
  title = 	 {{Verl\"a\ss{}lichkeitsbewertung komplexer Systeme}},
  journal = 	 j-IS,
  year = 	 {1998},
  OPTkey = 	 {},
  volume = 	 {21},
  number = 	 {6},
  pages = 	 {318--327},
  month = 	 dec,
  OPTnote = 	 {},
  annote = {Deutsche Einf\"uhrung in Begriffe wie Zuverl\"assigkeit,
    mittlere Lebensdauer (MTTF), Ausfallrate, Sicherheit, MTBF,
    Verf\"ugbarkeit, sowie die Methoden Fehlerb\"aume, Markovketten
    und hybride Ans\"atze. Konzentration auf analytische Bewertungen,
    nicht auf experimentelle (f\"ur experimentelle siehe
    \cite{Echtle:1998:FMB}).}
}

@Book{Vigna:1998:MAS,
  editor =               {G. Vigna},
  title =                {Mobile Agents and Security},
  publisher =    pub-SV,
  year =                 1998,
  volume =               1419,
  series =               ser-LNCS,
  address =              {Berlin},
  annote = "Angabe von Uwe Wilhelm"
}


@InProceedings{Voelzer:1998:VFT,
  author = 	 "Hagen {V\"olzer}",
  title = 	 "Verifying fault tolerance of distributed algorithms
		  formally: {An} example",
  OPTcrossref =  "",
  OPTkey = 	 "",
  OPTeditor = 	 "",
  OPTvolume = 	 "",
  OPTnumber = 	 "",
  OPTseries = 	 "",
  pages = 	 "187--197",
  booktitle = "Proceedings of the International Conference on
		  Application of Concurrency to System Design (CSD98)",
  year = 	 "1998",
  OPTorganization = "",
  publisher = pub-IEEE,
  address = 	 "Fukushima, Japan",
  month = 	 mar,
  OPTnote = 	 "",
  annote = 	 "This paper investigates the fully mechanical
		  verification of fault tolerant algorithms using the
		  DAWN approach \cite{Weber:1997:DAW} which is based
		  on Petri nets. The main point in doing so is to
		  formally handle faults and fault models. This is
		  done by distinguishing an (informal) fault model
		  from a formal fault impact model specified by a
		  Petri net. In this example, crash and omission
		  faults are formalized by additional state
		  transitions which are superimposed onto an algorithm
		  for fault free executions. Additionally to a fault
		  impact model, a ``rely'' property belongs to the
		  fault model. Such a property formalizes
		  ``assumptions about the environment'' like the
		  maximum number of faults that may occur, and it
		  makes these assumptions exploitable by a proof. The
		  example algorithm used is the SELF-2 fault diagnosis
		  algorithm by Kuhl and Reddy. The paper shows the
		  advantages of Petri nets in formulating and reasoning
		  about distributed algorithms. The superimposition
		  property of such nets make the approach extremenly
		  usefull for fault tolerant algorithms."
}

@inproceedings{Wilhelm:1998:PTM,
  year =        {1998},
  title =       {On the Problem of Trust in Mobile Agent Systems},
  author =      {U. G. Wilhelm and L. Butty\`an and S. Staamann},
  booktitle =   {Symposium on Network and Distributed System Security},
  publisher =   {Internet Society},
  keywords =    {IMPORTANT; Security},
  month =       mar,
  pages = "114--124",
  annote =      "[to read]"
}

@Article{Aguilera:1999:UHF,
  author =       "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg",
  title =        "Using the heartbeat failure detector for quiescent
                 reliable communication and consensus in partitionable
                 networks",
  journal =      "Theoretical Computer Science",
  volume =       "220",
  number =       "1",
  pages =        "3--30",
  day =          "06",
  month =        jun,
  year =         "1999",
  coden =        "TCSCDI",
  ISSN =         "0304-3975",
  bibdate =      "Mon Jul 19 22:22:41 MDT 1999",
  url =          "http://www.elsevier.com/cas/tree/store/tcs/sub/1999/220/1/3045.pdf",
  acknowledgement = ack-nhfb,
  annote =       "[to read]"
}

@TechReport{Aguilera:1999:WFD,
  year =         "1999",
  number =       "TR99-1741",
  institution =  "Cornell University, Computer Science",
  title =        "On the Weakest Failure Detector for Uniform
                 Reliable Broadcast",
  author =       "Marcos Kawazoe Aguilera and Sam Toueg and Borislav
                 Deianov",
  abstract =     "Uniform Reliable Broadcast (URB) is a communication
                 primitive that requires that if a process delivers a
                 message, then all correct processes also deliver this
                 message. A recent PODC paper \cite{Halpern:1999:KAU}
                 uses Knowledge
                 Theory to determine what failure detectors are
                 necessary to implement this primitive in asynchronous
                 systems with process crashes and lossy links that are
                 fair. In this paper, we revisit this problem using a
                 different approach, and provide a result that is
                 simpler, more intuitive, and, in a precise sense, more
                 general.",
  month =        apr # " 30,",
  annote =       ""
}

@Article{Benassi:1999:T,
  author =       "Paola Benassi",
  title =        "{TRUSTe}: An online privacy seal program",
  journal =      "Communications of the ACM",
  volume =       "42",
  number =       "2",
  pages =        "56--59",
  month =        feb,
  year =         "1999",
  coden =        "CACMA2",
  ISSN =         "0001-0782",
  bibdate =      "Fri Feb 5 07:01:55 MST 1999",
  url =          "http://www.acm.org:80/pubs/citations/journals/cacm/1999-42-2/p56-benassi/",
  acknowledgement = ack-nhfb,
  annote = "TRUSTe is a trustmark or a seal which providers can put onto
            their web pages in order to indicate sound privacy practices.
            The trustmark is issued by a organization which checks the
            pages (\url{www.truste.org}). See also \cite{Reagle:1999:PPP}."
}

@Article{Billinghurst:1999:WDN,
  author =       "Mark Billinghurst and Thad Starner",
  title =        "Wearable Devices: New Ways to Manage Information",
  journal =      "Computer",
  volume =       "32",
  number =       "1",
  pages =        "57--64",
  month =        jan,
  year =         "1999",
  coden =        "CPTRB4",
  ISSN =         "0018-9162",
  bibdate =      "Fri Jan 15 16:17:58 MST 1999",
  url =          "http://www.computer.org/computer/co1999/r1057abs.htm;
                 http://dlib.computer.org/co/books/co1999/pdf/r1057.pdf",
  acknowledgement = ack-nhfb,
  annote = "A thrilling and fascinating article on a somewhat
  underestimated branch of computer science. Computers can be
  incorporated into clothing, eyeglasses, can be worn around the neck,
  in a wristwatch, etc. Applications of wearable computers (also
  non-military) are given: navigation using augmented reality,
  wearable bar code scanners at UPS. The article also takes a shot at
  predicting what comes next: for example using augmented reality to
  do conferencing. Pointers to conferences, companies and research
  projects concerning wearables round up the article. For
  a market survey as of 2000 see \cite{Ditlea:2000:PCG}."
}

@Article{Boyle:1999:DYT,
  author =       "James M. Boyle and R. Daniel Resler and Victor L.
                 Winter",
  title =        "Do You Trust Your Compiler?",
  journal =      "Computer",
  volume =       "32",
  number =       "5",
  pages =        "65--73",
  month =        may,
  year =         "1999",
  url =          "http://www.computer.org/computer/co1999/r5065abs.htm;
                 http://dlib.computer.org/co/books/co1999/pdf/r5065.pdf",
  annote = "There are two problems involved when using formal methods
  to produce correct software: (1) coming up with an accurate formal
  specification of the problem, and (2) producing a correct
  implementation of the specification bzw. verifying that a given
  implementation is correct regarding the specification. This paper
  addresses the second problem and uses buggy compilers to motivate
  it. Bugs in compilers are well-documented (see news:gnu.gcc.bug for
  example). The idea is to start with a high level code and apply
  correctness preserving transformations to it until a lower level
  code is reached. Denotational sematics are used to define
  `correctness preserving'. As an open research problem it is noted
  that producing code from safety and liveness specifications would be
  good."
}

@InProceedings{Cardellini:1999:RAL,
  author = 	 {Valeria Cardellini and Michele Colajanni and Philip S. Yu},
  title = 	 {Redirection Algorithms for Load Sharing in Distributed 
                  Web-server Systems},
  booktitle = 	 pro-icdcs99,
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {528--535},
  year = 	 {1999},
  editor = 	 {Mohamed G. Gouda},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  month = 	 {May/June},
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  annote = 	 {[to read]}
}




@Article{Carreira:1999:FIS,
  author = 	 {Jo\~ao Viegas Carreira and Diamantino Costa 
                  and Jo\~ao Gabriel Silva},
  title = 	 {Fault injection spot-checks computer system dependability},
  journal = 	 {IEEE Spectrum},
  year = 	 {1999},
  OPTkey = 	 {},
  volume = 	 {36},
  number = 	 {8},
  pages = 	 {50--55},
  month = 	 aug,
  OPTnote = 	 {},
  annote = {A good motivation and introduction to fault injection from
  a more hardware point of view than
  \cite{Hsueh:1997:FIT,Echtle:1998:FMB}. Contains terms Heisenbugs (a
  failure that is not reconstructable), and Bohrbugs (the
  opposite). Like \cite{Rushby:1994:CSP} states that attaching
  reliability figures to a system is poblematic, even if the failure
  model is precisely fixed. States that there is research in Sematech,
  HP, Cpmpaq and Stanford to collect real fault data and thus enable
  more realistic failure models.}
}

@Article{Cristian:1999:TAD,
  author =       "Flaviu Cristian and Christof Fetzer",
  title =        "The Timed Asynchronous Distributed System Model",
  journal =      "{IEEE} Transactions on Parallel and Distributed
                 Systems",
  year =         "1999",
  volume =       "10",
  number =       "6",
  month =        jun,
  url =          "http://www-cse.ucsd.edu/users/cfetzer/MODEL/",
  abstract =     "We propose a formal definition for the timed
                 asynchronous distributed system model. We present
                 extensive measurements of actual message and process
                 scheduling delays and hardware clock drifts. These
                 measurements confirm that this model adequately
                 describes current distributed systems such as a network
                 of workstations. We also give an explanation of why
                 practically needed services, such as consensus or
                 leader election, which are not implementable in the
                 time-free model, are implementable in the timed
                 asynchronous system model.",
  language =     "English",
  annote =       "A revised version of \cite{Cristian:1998:TAS}."
}

@InProceedings{Echtle:1999:UCB,
  author = 	 {Klaus Echtle and Asif Masum},
  title = 	 {Understanding Cooperative Byzantine Failures: A Novel Failure
                  Classification to Enable Efficient Fault-Tolerant Protocols},
  booktitle = 	 {Proceedings of the Annual IEEE Workshop on Fault-Tolerant 
                  Parallel and Distributed Systems (FTPDS'99)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {San Juan, Puerto Rico, USA},
  month = 	 apr,
  OPTorganization = {},
  publisher = {Kluwer},
  OPTnote = 	 {},
  annote = {The authors present a unifying approach to modeling the
  common fault classes formally. This is the most general approach
  that I know of and in contrast to \cite{Gaertner:1998:SFT} seems to
  be easier to adapt to the common fault classes (and is able to
  derive new ones). The work contains a near-to-complete list of
  references to fault classification work and puts the terms fault,
  error and failure in a nice layered context (p. 2). The model
  consists of a set of $n$ components that can be seperated into fault
  free and faulty ones. Components communicate by sending messages
  from some fixed message set. Sending and receipt of a message
  trigger events. An event is a tuple consisting of the message, the
  event type (send/receive) and a time tag, which specifies the global
  point in continuous real time in which the event occurs. Component
  behaviors can now be described as event sets, which through the time
  tag implicitly define a single (?!) sequence of events (not a set of
  sequences?). A specification $S_i$ for component $i$ is a set of
  correct input/output tuples, i.e. a relation over input sequences
  and output sequences. Failure modes are defined in a functional way:
  a failure mode identifies sets of behaviors which a component may
  exhibit following the occurence of a set of receive events. Now it
  is possible to define the different ``failure mode functions'' for
  correct behavior, fail-silent, fail-omission, message loss, message
  duplication etc. by changing tags in message sets or message sets
  themselves. To define failures affecting code integrity
  (e.g. altered messages) the authors define the concept of a failure
  capability $C_i$ for component $i$. This can be seen as a degraded
  component specification, i.e. is the set of behaviors allowed by $i$
  if it is faulty. Using this construct it is possible to derive a
  rich set of distinctive failure modes visualized in Fig. 10. As a
  further novelty, the authors introduce a new failure mode, that of
  non-cooperative Byzantine. This is where no malicious cooperation
  takes place between faulty nodes. This is formalized along the idea
  that such behavior must be based on either (1) malicious treason
  (e.g., revealing a secret key) or (2) malicious delegation
  (e.g. some node asks another node to sign a message). Malicious
  cooperation is then defined (on p. 19) as ``increasing the failure
  capabilty by the receipt of a message'' (see also the `generates'
  relation of \cite{Schneider:1998:FAN}). Non-cooperative behavior is
  defined as the complement of malicious cooperation. It is nice to
  have different types of Byzantine behaviors because this can result
  in protocols that are more efficient. This is shown by example.
  Overall a formal, but very rewarding paper which can also be used as
  an overview over the state of the art in failure classification. 
  See also \cite{Echtle:2000:FFM} and Asif's thesis.}
}



@InProceedings{Essame:1999:PPA,
  author = 	 {Didier Essame and Jean Arlat and David Powell},
  title = 	 {Padre: {A} protocol for asymmetric duplex redundancy},
  booktitle = 	 {Proceedings of the Seventh IFIP International Working 
     Conference on Dependable Computing for Critical Applications},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {San Jose, USA},
  month = 	 jan,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {[to get] uses the timed asynchronous model to build
    a fully automated train control system, cited in 
    \cite{Cristian:1999:TAD}.}
}

@InProceedings{Felber:1999:FDF,
  year =         "1999",
  address =      "Edinburgh, Scotland",
  pages =        "132--141",
  title =        "Failure Detectors as First Class Objects",
  author =       "Pascal Felber and Xavier D\'efago and Rachid 
                  Guerraoui and P. Oser",
  booktitle =    "Proceedings of the International Symposium on
                 Distributed Objects and Applications (DOA'99)",
  month =        sep,
  annote = "[to get]"
}

@Article{Felber:1999:POD,
  author =       "Pascal Felber and Rachid Guerraoui and Mohamed E.
                 Fayad",
  title =        "Putting {OO} distributed programming to work",
  journal =      "Communications of the ACM",
  volume =       "42",
  number =       "11",
  pages =        "97--101",
  month =        nov,
  year =         "1999",
  url =          "http://www.acm.org/pubs/articles/journals/cacm/1999-42-11/p97-felber/p97-felber.pdf;
                 http://www.acm.org/pubs/citations/journals/cacm/1999-42-11/p97-felber/",
  annote = "Discusses different approaches to specify, model and implement
            failure detectors. Distinguishes the push model, pull model and 
            the dual model (combination of push and pull). Similar title is 
            \cite{Felber:1999:FDF}. Failure detector implementations also 
            discussed in \cite{Sergent:1999:FDI}."
}

@InProceedings{Fetzer:1999:CTA,
  author = 	 {Christof Fetzer},
  title = 	 {A comparison of timed asynchronous systems and 
                  asynchronous systems with failure detectors},
  booktitle = 	 {Proceedings of the Third European Research Seminar
                  on Advances in Distributed Systems (ERSADS'99)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {109--118},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Madeira Island, Portugal},
  month = 	 apr,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {[to write]}
}

@Article{Gabber:1999:CYA,
  author =       "Eran Gabber and Phillip B. Gibbons and David M.
                 Kristol and Yossi Matias and Alain Mayer",
  title =        "Consistent, yet anonymous, {Web} access with {LPWA}",
  journal =      "Communications of the ACM",
  volume =       "42",
  number =       "2",
  pages =        "42--47",
  month =        feb,
  year =         "1999",
  coden =        "CACMA2",
  ISSN =         "0001-0782",
  bibdate =      "Fri Feb 5 07:01:55 MST 1999",
  url =          "http://www.acm.org:80/pubs/citations/journals/cacm/1999-42-2/p42-gabber/",
  acknowledgement = ack-nhfb,
  annote = "The LPWA is the Lucent Personalized Web Assistant, a tool which
            helps you manage different pseudonyms and thus manage anonymity
            on the web. Related articles are about Crowds 
            \cite{Reiter:1999:AWT}, onion routing \cite{Goldschlag:1999:OR},
            and \cite{Reagle:1999:PPP,Benassi:1999:T}."
}



@InProceedings{Gaertner:1999:AFD,
  author = 	 {Felix C. {G\"artner} and Henning Pagnia and Holger Vogt},
  title = 	 {Approaching a formal definition of fairness in 
                  electronic commerce},
  booktitle = 	 {Proceedings of the International Workshop on Electronic 
                 Commerce (WELCOM'99)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {354--359},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Lausanne, Switzerland},
  month = 	 oct,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  annote = 	 {[to write]}
}



@Unpublished{Gaertner:1999:DR,
  author = 	 {Felix C. {G\"artner} and Hagen {V\"olzer}},
  title = 	 {Defining Redundancy in Fault-Tolerant Computing},
  note = 	 {unpublished manuscript},
  OPTkey = 	 {},
  OPTmonth = 	 {},
  year = 	 {1999},
  OPTannote = 	 {}
}

@InProceedings{Gaertner:1999:ESD,
  author = 	 {Felix C. {G\"artner}},
  title = 	 {An exercise in systematically deriving fault-tolerance 
                  specifications},
  booktitle = 	 {Proceedings of the Third European Research Seminar on 
                 Advances in Distributed Systems (ERSADS)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Madeira Island, Portugal},
  month = 	 apr,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {Shorter Version of \cite{Gaertner:1999:ESDFS}.}
}

@TechReport{Gaertner:1999:ESDFS,
  author = 	 {Felix C. G\"artner},
  title = 	 {An exercise in systematically deriving fault-tolerance 
                 specifications},
  institution =  {Department of Computer Science, Darmstadt University 
                 of Technology},
  year = 	 {1999},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {TUD-BS-1999-01},
  address = 	 {Darmstadt, Germany},
  month = 	 mar,
  OPTnote = 	 {Available at http://www.informatik.tu-darmstadt.de/BS/Gaertner/publications/TUD-BS-1999-01.ps.gz},
  annote = 	 {}
}



@Article{Gaertner:1999:FFT,
  author = 	 {Felix C. {G\"artner}},
  title = 	 {Fundamentals of fault-tolerant distributed computing in 
                  asynchronous environments},
  journal = 	 j-ACM-COMP-SURVEYS,
  year = 	 {1999},
  OPTkey = 	 {},
  volume = 	 {31},
  number = 	 {1},
  pages = 	 {1--26},
  month = 	 mar,
  OPTnote = 	 {},
  annote = 	 {updated version of \cite{Gaertner:1998:FFT}.}
}

@TechReport{Gaertner:1999:FUF,
  author = 	 {Felix C. {G\"artner} and Armin Wolfram},
  title = 	 {{Fehlererkennung und Fehlerdiagnose f\"ur
                  verl\"a\ss{}liche Systeme -- Automatisierungstechnik
                  vs.~verteilte Systeme}},
  institution =  {Department of Computer Science, Darmstadt University 
                 of Technology},
  year = 	 {1999},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {TUD-BS-1999-03},
  address = 	 {Darmstadt, Germany},
  month = 	 jul,
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@InProceedings{Gaertner:1999:SLD,
  author = 	 {Felix C. {G\"artner} and Henning Pagnia},
  title = 	 {Self-stabilizing Load Distribution for Replicated 
                  Servers on a Per-Access Basis},
  booktitle = 	 pro-wss99,
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {102--109},
  year = 	 {1999},
  editor = 	 {Anish Arora},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Austin, TX},
  month = 	 jun,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  annote = 	 {A self-stabilizing extension to existing load balancing 
     schemes (such as \cite{Arora:1997:OCC,Arora:1995:ECC,Gronning:1990:SDD}) 
     to allow fine grained load distribution based on redirection. Pointers 
     to commercial realizations appear in \cite{Cardellini:1999:RAL}.}
}



@TechReport{Gaertner:1999:STA,
  author = 	 {Felix C. {G\"artner}},
  title = 	 {A survey of transformational approaches to the 
                  specification and verification of fault-tolerant systems},
  institution =  {Department of Computer Science, Darmstadt University 
                 of Technology},
  year = 	 {1999},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {TUD-BS-1999-04},
  address = 	 {Darmstadt, Germany},
  month = 	 apr,
  note = 	 {To appear in \textit{Journal of Universal Computer Science}
                 (J.UCS), special issue on ``Dependability Evaluation and 
                 Assessment'' (November, 1999).},
  OPTannote = 	 {Journal version \cite{Gaertner:1999:TAS}.}
}



@Article{Gaertner:1999:TAS,
  author = 	 {Felix C. {G\"artner}},
  title = 	 {Transformational Approaches to the Specification and 
                  Verification of Fault-Tolerant Systems: {Formal} 
                  Background and Classification},
  journal = 	 {Journal of Universal Computer Science (J.UCS)},
  year = 	 {1999},
  OPTkey = 	 {},
  volume = 	 {5},
  number = 	 {10},
  pages = 	 {668--692},
  month = 	 oct,
  note = 	 {Special Issue on Dependability Evaluation and Assessment},
  annote = 	 {Prior technical report \cite{Gaertner:1999:STA}.}
}

@Article{Glass:1999:RST,
  author =       "Robert L. Glass",
  title =        "The realities of software technology payoffs",
  journal =      "Communications of the ACM",
  volume =       "42",
  number =       "2",
  pages =        "74--79",
  month =        feb,
  year =         "1999",
  coden =        "CACMA2",
  ISSN =         "0001-0782",
  bibdate =      "Fri Feb 5 07:01:55 MST 1999",
  url =          "http://www.acm.org:80/pubs/citations/journals/cacm/1999-42-2/p74-glass/",
  acknowledgement = ack-nhfb,
  annote = "Glass studies which new software engineering practices
  have turned out to pay off in the long run. These techologies are:
  structured techniques, fourth generation languages, CASE, formal
  methods, cleanroom methodology, process models,
  object-orientation. Especially interesting to me is the discussion
  of formal methods. Glass says that it has been little used because
  it still is largely underdefined and underevaluated. Only one study
  has brought forward hard numbers \cite{Ralston:1991:FMH}."
}

@Article{Goldschlag:1999:OR,
  author =       "David Goldschlag and Michael Reed and Paul Syverson",
  title =        "Onion routing",
  journal =      "Communications of the ACM",
  volume =       "42",
  number =       "2",
  pages =        "39--41",
  month =        feb,
  year =         "1999",
  coden =        "CACMA2",
  ISSN =         "0001-0782",
  bibdate =      "Fri Feb 5 07:01:55 MST 1999",
  url =          "http://www.acm.org:80/pubs/citations/journals/cacm/1999-42-2/p39-goldschlag/",
  acknowledgement = ack-nhfb,
  annote = "See the \url{www.onion-router.net}. Other methods to
  achieve privacy on the net are discussed in other articles from this
  CACM issue \cite{Reiter:1999:AWT,Gabber:1999:CYA,Reagle:1999:PPP}
  and \cite{Benassi:1999:T}"
}



@Article{Grimley:1999:PIA,
  author = 	 {Michael J. Grimley and Brian D. Monroe},
  title = 	 {Protecting the integrity of agents: {An} exploration 
                  into letting agents loose in an unpredictable world},
  journal = 	 {Crossroads - The ACM Student Magazine},
  year = 	 {1999},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  number = 	 {5.4},
  pages = 	 {10--17},
  month = 	 {},
  OPTnote = 	 {},
  annote = 	 {A good and brief surver introduction into the issues
                  of security of agents (both protecting agents from
                  their execution environments and vice versa, with lots
                  of good references. A good staring point.}
}



@InProceedings{Halpern:1999:KAU,
  author = 	 {Joseph Y. Halpern and Aleta Ricciardi},
  title = 	 {A knowledge-theoretic analysis of uniform distributed
                  coordination and failure detectors},
  booktitle = 	 pro-podc99,
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {73--82},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {A discussion appears in \cite{Aguilera:1999:WFD}.}
}

@Article{Hennessy:1999:FSR,
  author =       "John Hennessy",
  title =        "The Future of Systems Research",
  journal =      "Computer",
  volume =       "32",
  number =       "8",
  pages =        "27--33",
  month =        aug,
  year =         "1999",
  url =          "http://www.computer.org/computer/co1999/r8027abs.htm;
                 http://dlib.computer.org/co/books/co1999/pdf/r8027.pdf",
  annote = "A speculation on what will be and what should be the
  subject of research and development in systems in the next
  years. Interesting is that the author explicitly mentions
  availability as a key issue and fault-tolerance as a key
  mechanism. However, fault tolerance research must focus more on
  gradual and dynamic mechanisms, not directly hiding fault evidence
  but helping maintain availablity, for example like in the RAID
  approach \cite{Patterson:1988:CRA}. A good reference for the 
  importance of fault tolerance research."
}




@Article{Hoffman:1999:PCL,
  author = 	 {Forrest Hoffman and William Hargove},
  title = 	 {Parallel computing with {Linux}},
  journal = 	 {Crossroads, the ACM student magazine},
  year = 	 {1999},
  OPTkey = 	 {},
  volume = 	 {6},
  number = 	 {1},
  pages = 	 {23--27},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = {Gives a practical guide to installing a beowulf parallel
  computing system at your home. Gives a lot of online references to
  more information and is a good starting point for beowulf projects.}
}

@Article{Hurfin:1999:SFA,
  author =       "Michel Hurfin and Michel Raynal",
  title =        "A Simple and Fast Asynchronous Consensus Protocol
                 Based on a Weak Failure Detector",
  journal =      j-DC,
  volume =       "12",
  number =       "4",
  pages =        "209--223",
  year =         "1999",
  abstract = "The Consensus problem is a fundamental paradigm for
        fault-tolerant asynchronous systems. It abstracts a family of
        problems known as Agreement (or Coordination) problems. Any
        solution to consensus can serve as a basic building block for
        solving such problems (e.g., atomic commitment or atomic
        broadcast). Solving consensus in an asynchronous system is not
        a trivial task: it has been proven (1985) by Fischer, Lynch
        and Paterson that there is no deterministic solution in
        asynchronous systems which are subject to even a single crash
        failure. To circumvent this impossibility result, Chandra and
        Toueg have introduced the concept of unreliable failure
        detectors (1991), and have studied how these failure detectors
        can be used to solve consensus in asynchronous systems with
        crash failures. This paper presents a new consensus protocol
        that uses a failure detector of the class $\Diamond{\cal
        S}$. Like previous protocols, it is based on the rotating
        coordinator paradigm and proceeds in asynchronous
        rounds. Simplicity and efficiency are the main characteristics
        of this protocol. From a performance point of view, the
        protocol is particularly efficient when, whether failures
        occur or not, the underlying failure detector makes no mistake
        (a common case in practice). From a design point of view, the
        protocol is based on the combination of three simple
        mechanisms: a voting mechanism, a small finite state automaton
        which manages the behavior of each process, and the
        possibility for a process to change its mind during a round.",
  annote =       "Must be noted as one of the standard consensus
        protocols amoung \cite{Chandra:1996:UFD} and 
        \cite{Schiper:1997:ECA,Schiper:1997:EEC}."
}


@Article{Jajodia:1999:SIW,
  author =       "Sushil Jajodia and Paul Ammann and Catherine D.
                 McCollum",
  title =        "Surviving Information Warfare Attacks",
  journal =      "Computer",
  volume =       "32",
  number =       "4",
  pages =        "57--63",
  month =        apr,
  year =         "1999",
  coden =        "CPTRB4",
  ISSN =         "0018-9162",
  bibdate =      "Thu Apr 1 07:09:15 MST 1999",
  url =          "http://www.computer.org/computer/co1999/r4057abs.htm;
                 http://dlib.computer.org/co/books/co1999/pdf/r4057.pdf",
  annote = "Describes the dangers which information systems are
  suspect to and the traditional methods of preventing them (fault
  tolerance, database system management mechanisms). A realistic
  alternative to these two approaches is described that is a mixture
  of both, attacks and countermeasures are briefly described. While
  the exact mechanisms remain rather superficial, this paper is
  another example for the fact that security can also be seen as a
  fault tolerance problem (\cite{Arora:1998:DMF} is cited directly)
  with all the implications. See also \cite{Schneier:1998:CDV}."
}



@InProceedings{Jochim:1999:AGD,
  author = 	 {Markus Jochim},
  title = 	 {Automatic Generation of Diversified Program 
                  Variants Optimized to Detect Hardware Faults},
  booktitle = 	 {Tenth European Workshop on Dependable Computing (EWDC-10)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {169--174},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Vienna, Austria},
  OPTmonth = 	 {},
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  url = "http://www.cs.uni-essen.de/Fachgebiete/Depend/Papers/Joch99/",
  annote = 	 {Presents ideas on how to automatically introduce 
     code diversity into machine programs so that two distinct but
     semantically equivalent processes can run in parallel (virtual
     duplex system) and detect hardware errors with high probability.
     Discusses practical considerations in the design of code
     mutation rules like independence of addressing mode, overflow,
     short code production etc.}
}

@InProceedings{Johansen:1999:NAP,
  author = 	 "Dag Johansen and Keith Marzullo and Fred B. Schneider
                 and Kjetil Jacobsen and Dmitrii Zagorodnov",
  title = 	 "{NAP}: Practical Fault-Tolerance for Itinerant
                 Computations",
  booktitle = 	 {Proceedings of the 19th IEEE International Conference
                  on Distributed Computing Systems},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {180--189},
  year = 	 {1999},
  editor = 	 {Mohamed G. Gouda},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Austin, Texas},
  month = 	 jun,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@Article{Jutla:1999:MBS,
  author =       "Dawn Jutla and Peter Bodorik and Catherine Hajnal and
                 Charles Davis",
  title =        "Making Business Sense of Electronic Commerce",
  journal =      "Computer",
  volume =       "32",
  number =       "3",
  pages =        "67--75",
  month =        mar,
  year =         "1999",
  coden =        "CPTRB4",
  ISSN =         "0018-9162",
  bibdate =      "Sat Mar 6 09:04:10 MST 1999",
  url =          "http://www.computer.org/computer/co1999/r3067abs.htm;
                 http://dlib.computer.org/co/books/co1999/pdf/r3067.pdf",
  acknowledgement = ack-nhfb,
  annote =       "A good overview over the issues involved in adoption
                 and applying e-commerce in different fields of buisiness.
                 Buisiness models and application frameworks are presented."
}


@Article{Karaata:1999:SAB,
  author =       "Mehmet Hakan Karaata and Pranay Chaudhuri",
  title =        "A self-stabilizing algorithm for bridge finding",
  journal =      j-DC,
  volume =       "12",
  year =         "1999",
  pages =        "47--53",
  annote = "Finds edges which partition the graph if they are removed.
            Builds upon spanning tree algorithm by \cite{Huang:1992:SSA}."
}

@Article{Kelley:1999:HTB,
  author = 	 {Robert E. Kelley},
  title = 	 {How to be a star engineer},
  journal = 	 {IEEE Spectrum},
  year = 	 {1999},
  OPTkey = 	 {},
  volume = 	 {36},
  number = 	 {10},
  pages = 	 {51--58},
  month = 	 oct,
  OPTnote = 	 {},
  annote = 	 {Reports on a study about engineer work performance
                  and discusses many misconceptions. Argues that star
                  performers are normal workers who are treated in 
                  a special way. keyword: Soft skills, also for managers.}
}

@MastersThesis{Kloppenburg:1999:EPS,
  author = 	 {Sven Kloppenburg},
  title = 	 {Entdecken globaler {Pr\"adikate} in verteilten Systemen 
                  mit {Anhalteausf\"allen}},
  school = 	 {Technische Universit\"at Darmstadt, Fachbereich Informatik,
                  Fachgebiet Betriebssysteme},
  year = 	 {1999},
  OPTkey = 	 {},
  type = 	 {Diplomarbeit},
  OPTaddress = 	 {},
  month = 	 sep,
  note = 	 {DA-BS-1999-02},
  annote = 	 {Results published in \cite{Gaertner:2000:CDG}. A cite for
    the term ``Anhalteausfall'', German for ``crash''.}
}



@PhdThesis{Kulkarni:1999:CBD,
  author = 	 {Sandeep S. Kulkarni},
  title = 	 {Component Based Design of Fault-Tolerance},
  school = 	 {Department of Computer and Information Science, The Ohio
                  State University},
  year = 	 {1999},
  OPTkey = 	 {},
  OPTtype = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {Several papers contain results of this thesis, e.g.
                  \cite{Arora:1998:CDM}.}
}

@InProceedings{Kulkarni:1999:CSC,
  author = 	 {Sandeep S. Kulkarni and John Rushby and Natarajan Shankar},
  title = 	 {A Case-Study in Component-Based Mechanical Verification 
                  of Fault-Tolerant Programs},
  booktitle = 	 pro-wss99,
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {33--40},
  year = 	 {1999},
  editor = 	 {Anish Arora},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Austin, TX, USA},
  month = 	 jun,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  OPTannote = 	 {}
}


@Article{Lange:1999:SGR,
  title =        "{Seven good reasons for mobile agents}",
  author =       "Danny B. Lange and Mitsuru Oshima",
  journal =      "Communications of the ACM",
  volume =       "42",
  number =       "3",
  month =        mar,
  year =         "1999",
  pages =        "88--89",
  url =          "http://www.acm.org/pubs/articles/journals/cacm/1999-42-3/p88-lange/p88-lange.pdf",
  annote = "While the title states otherwise, the reasons presented
                 here are to me rather non-reasons: 1. they reduce the
                 network load, 2. they overcome network latency,
                 3. they encapsulate protocols, 4. they execute
                 asynchronously and autonomously, 5. they adapt
                 dynamically, 6. they are naturally heterogeneous,
                 7. they are robust and fault-tolerant. I find the way
                 in which the individual reasons are presented very
                 non-convincing, probably because the exposition is so
                 brief. Some applications of agents are given
                 (e-commerce, personal assiatance, secure brokering,
                 distributed information retrieval, ...)."
}

@Article{Lewis:1999:BCM,
  author =       "Ted Lewis",
  title =        "Binary Critic: Mainframes Are Dead, Long Live
                 Mainframes",
  journal =      "Computer",
  volume =       "32",
  number =       "8",
  pages =        "104, 102--103",
  month =        aug,
  year =         "1999",
  url =          "http://dlib.computer.org/co/books/co1999/pdf/r8104.pdf",
  annote = "Argues that mainframes are experiencing a revival because
  of their unmatched reliability. Gives some figures: Cost of downtime
  ranges from \$1000 per minute for simple e-mail to \$13000 per
  minute for enterprise resource planning applications. Also: An IBM
  S/390 sysplexed mainframe only has 10 minutes outage per year, while
  a windows-NT-based PC has about 224.5 hours outage per year (table
  1)."
}



@TechReport{Mantel:1999:CSM,
  author = 	 {Heiko Mantel and Felix C. {G\"artner}},
  title = 	 {A case study in the mechanical verification of 
                  fault tolerance},
  institution =  {Department of Computer Science, Darmstadt University
                  of Technology },
  year = 	 {1999},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {TUD-BS-1999-08},
  OPTaddress = 	 {},
  month = 	 nov,
  OPTnote = 	 {},
  OPTannote = 	 {}
}



@InProceedings{Mostefaoui:1999:SCU,
  author = 	 {Achour Mostefaoui and Michel Raynal},
  title = 	 {Solving Consensus Using Chandra-Toueg's Unreliable
                  Failure Detectors: a General Quorum-Based Approach},
  booktitle = 	 {Proceedings of the 13th International Symposium on
                  Distributed Computing (DISC)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  number = 	 {1693},
  series = 	 ser-LNCS,
  address = 	 {Bratislava, Slovak Republik},
  month = 	 sep,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {Uses dynamic quorums to define when a value may be
                  decided.}
}

@Article{Oberg:1999:WMP,
  author =       "James Oberg",
  journal =      "IEEE Spectrum",
  number =       "12",
  pages =        "34--39",
  title =        "Why the Mars Probe went off course",
  volume =       "36",
  year =         "1999",
  crindex =      "Journal",
  location =     "http://www.spectrum.ieee.org/spectrum/dec99/features/mars.html",
  annote = "A detailed report on why the mars climate orbiter crashed onto
            the surface of Mars in 1999. Popularly believed to be only an
            error in taking metric and British measurement units, the
            article shows that the orbiter failed to follow the right
            trajectory also partly because of severe management mistakes
            and sensor inaccuracies:  Uncertainty lead to assuming 
            good things instead of bad things, so instead of a safe 
            fly-by the orbiter must have crashed onto the surface of
            Mars (even that is not sure)."            
}

@InProceedings{Pagnia:1999:EGP,
  author = 	 {Henning Pagnia and Holger Vogt},
  title = 	 {Exchanging goods and payment in electronic business
                 transactions},
  booktitle = 	 {Proceedings of the Third European Research Seminar on 
                 Advances in Distributed Systems (ERSADS)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Madeira Island, Portugal},
  month = 	 apr,
  OPTorganization = {},
  OPTpublisher = {},
  note = 	 {Proceedings distributed as copies at the conference.},
  annote = 	 {similar to \cite{Vogt:1999:FAE} but in English; I have
                  an electronic copy in literature/pagnia-ersads.ps
                  A similar protocol has appeared in \cite{Zhou:1996:FNP},
                  a shorter presentation is \cite{Schneider:1998:FAN}.}
}



@TechReport{Pagnia:1999:IFE,
  author = 	 {Henning Pagnia and Felix C. {G\"artner}},
  title = 	 {On the impossibility of fair exchange without a trusted 
                 third party},
  institution =  {Darmstadt University of Technology, Department of 
                 Computer Science},
  year = 	 {1999},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {TUD-BS-1999-02},
  address = 	 {Darmstadt, Germany},
  month = 	 mar,
  url =          {\url{http://www.informatik.tu-darmstadt.de/BS/Gaertner/publications/TUD-BS-1999-02.ps.gz}},
  note = 	 {Available at \url{http://www.informatik.tu-darmstadt.de/BS/Gaertner/publications/TUD-BS-1999-02.ps.gz}. A substantially revised version is available 
                  upon request from the authors.},
  OPTannote = 	 {}
}

@InProceedings{Pedone:1999:GB,
  year =         "1999",
  title =        "Generic Broadcast",
  author =       "F. Pedone and A. Schiper",
  booktitle =    "Proceedings of the 13th International Symposium
                 on Distributed Computing (DISC'99)",
  month =        sep,
  url =          "http://lsewww.epfl.ch/Documents/acrobat/PS99c.pdf",
  annote = "see also \cite{Aguilera:2000:TGB} [to get]"
}


@Article{Reagle:1999:PPP,
  author =       "Joseph Reagle and Lorrie Faith Cranor",
  title =        "The platform for privacy preferences",
  journal =      "Communications of the ACM",
  volume =       "42",
  number =       "2",
  pages =        "48--55",
  month =        feb,
  year =         "1999",
  coden =        "CACMA2",
  ISSN =         "0001-0782",
  bibdate =      "Fri Feb 5 07:01:55 MST 1999",
  url =          "http://www.acm.org:80/pubs/citations/journals/cacm/1999-42-2/p48-reagle/",
  acknowledgement = ack-nhfb,
  annote = "PPP is a way of formally stating privacy policies within
  web pages and making privacy practices compareable and automatically
  manageable. related work ist the TRUSTe seal \cite{Benassi:1999:T}."  
}



@Article{Reicherzer:1999:AUA,
  author = 	 {Judith Reicherzer},
  title = 	 {{Angeklickt und abgezockt}},
  journal = 	 {Die Zeit},
  year = 	 {1999},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  number = 	 {34},
  pages = 	 {20--21},
  month = 	 "19.~" # aug,
  OPTnote = 	 {},
  annote = 	 {Gute Motivation fuer die Notwendigkeit von Fair Exchange.}
}

@Article{Reiter:1999:AWT,
  author =       "Michael K. Reiter and Aviel D. Rubin",
  title =        "Anonymous {Web} transactions with crowds",
  journal =      "Communications of the ACM",
  volume =       "42",
  number =       "2",
  pages =        "32--48",
  month =        feb,
  year =         "1999",
  coden =        "CACMA2",
  ISSN =         "0001-0782",
  bibdate =      "Fri Feb 5 07:01:55 MST 1999",
  url =          "http://www.acm.org:80/pubs/citations/journals/cacm/1999-42-2/p32-reiter/",
  acknowledgement = ack-nhfb,
  annote = "One of the prominent projects to achieve anonymity on the
  web. The approach of crowds uses a nondeterministic forwarding
  service between clients within a crowd. A web server receiving a
  request cannot know whether the request originated from the sender
  or from some other member of the crowd. The concept can even provide
  privacy against a number of collaborating members of the crowds
  itself. Disadvantages of crowds are (amoung others) the increased
  retrieval latency, and having to protect the confidentiality of the
  message against other crowd members. Compared against the anonymizer
  fo example, crowds has no single point where provacy can be
  compromised. Crowds has been implemented and deployed in the
  US. Some practical issues are also discussed and references to
  research papers are given. Other methods to achieve privacy are
  onion routing \cite{Goldschlag:1999:OR}, anonymizer
  (\url{www.anonymizer.com}), LPWA \cite{Gabber:1999:CYA}. Relevant
  other articles are \cite{Reagle:1999:PPP,Benassi:1999:T}."
}

@InCollection{Roth:1999:MPC,
  author =       {V. Roth},
  title =        {Mutual Protection of Co-operating Agents},
  booktitle =    "Secure Internet Programming: Security Issues
                  for Mobile and Distributed Objects",
  pages     =     "277--287",
  crossref  =    "Vitek:1999:SIP",
  annote =       "ref von Uwe Wilhelm"
}


@Misc{Semper:1999:ASA,
  OPTkey = 	 {},
  OPTauthor = 	 {},
  editor =       "SEMPER Consortium and IBM {Z\"urich}",
  title = 	 {Advanced Services, Architecture and Design},
  howpublished = {SEMPER Deliverable D10; La Gaude},
  month = 	 mar,
  year = 	 {1999},
  note = 	 {Available at http://www.semper.org/deliver/d10/d10.ps.gz},
  annote = 	 {Part of the final report on the SEMPER project.}
}



@TechReport{Sergent:1999:FDI,
  author = 	 {Nicole Sergent and Xavier {D\'efago} and {Andr\'e} Schiper},
  title = 	 {Failure Detectors: implementation issues and impact on 
                  consensus performance},
  institution =  {{\'Ecole} Polytechnique {F\'ed\'erale} de Lausanne, 
                  Switzerland},
  year = 	 {1999},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {SSC/1999/019},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = {This paper presents several different ways to implement
    crash failure detectors and measures the impact of these
    implementations on the performance of the Chandra Toueg Consensus
    algorithm \cite{Chandra:1996:UFD}. The different implementations
    are: heart beat (a node periodically sends `alive' messages),
    interrogation (nodes keep exchanging `are you alive', `alive'
    messages), and two optimizations: use only critical messages to do
    request response type failure detection, sending heart beats only
    between critical requests/respones. The simulation of the
    consensus algorithm shows that the time out used to implement
    suspicions together with the period interval of sending failure
    detector messages have optimal combinations regarding the
    termination time of the algorithm. It is argued that using
    failure detectors does not relieve the engineer to consider
    timing issues (also indicated by \cite{Fetzer:1999:CTA}).}
}



@InProceedings{Theel:1999:EPC,
  author = 	 {Oliver Theel and Felix C. {G\"artner}},
  title = 	 {An Exercise in Proving Convergence through Transfer 
                  Functions},
  booktitle = 	 pro-wss99,
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {41--47},
  year = 	 {1999},
  editor = 	 {Anish Arora},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Austin, TX},
  month = 	 jun,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  annote = 	 {A simpler example than in \cite{Theel:1998:OPS}, still
                  not distributed, but from an algorithms viewpoint.}
}

@InProceedings{Theel:1999:OPT,
  author = 	 {Oliver Theel and Felix C. {G\"artner}},
  title = 	 {On proving termination through transfer functions},
  booktitle = 	 {Proceedings of the 4th International Workshop on
                  Termination},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Dagstuhl, Germany},
  month = 	 may,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {}
}



@InCollection{Verissimo:1999:TDS,
  author = 	 {Paulo Ver{\'\i}ssimo and Michel Raynal},
  title = 	 {Time in distributed system models and algorithms},
  booktitle = 	 {Advances in Distributed Systems, Part I -- Distributed
                  Algorithms},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  publisher = {ESPRIT Broadcast, Springer-Verlag},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTtype = 	 {},
  OPTchapter = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  note = 	 {to appear, available at \url{http://www.navigators.di.fc.ul.pt/archive/TimeBcast.ps.gz}},
  annote = {Summarizes a great deal of work already published
  elsewhere. First briefly sketches the controversy synchrony
  vs. asynchrony and states that today, timeliness constraints in
  terms of real-time are increasingly important, especially in
  dependable systems (flight control) or QoS applications. This leads
  to the quasi synchronous system model, which is then briefly
  elaborated on (for a more detailed explanation, see
  \cite{Almeida:1998:QSA}). Timing failure detectors (as
  generalizations of crash failure detectors \cite{Chandra:1996:UFD})
  are presented, motivated and implemented in the quasi synchronous
  setting. Timing failure detectors are complete in a safety sense
  (i.e., they detect timing failures within a known real-time
  bound). Such failure detectors can be generalized to QoS failure
  detectors. Then the CesiumSpray system for global clock
  synchronozation is presented (a hierachical and hybrid one to
  exploit the characteristics of different WAN/LAN settings), then
  follow some generalizations of causal or temporal precendence orders
  which also take events outside of the system into account and try to
  order them (I did not read that too carefully). Finally, some
  protocols to achieve such order are presented.}
}

@Book{Vitek:1999:SIP,
  editor =       "J. Vitek and C. Jensen",
  title =        "Secure Internet Programming: Security Issues
                  for Mobile and Distributed Objects",
  volume =       "1603",
  publisher =    pub-SV,
  address =      "New York, NY, USA",
  year =         "1999",
  series =       "Lecture Notes in Computer Science",
  keywords =     "Computer security; Electronic data processing ---
                 Distributed processing --- Security; Intelligent agents
                 (Computer software) --- Security measures; measures;
                 Mobile agents (Computer software)",
}



@InProceedings{Vogt:1999:FAE,
  author = 	 {Holger Vogt and Henning Pagnia},
  title = 	 {{Fairer Austausch beim elektronischen Einkauf im Internet}},
  booktitle = 	 {Proceedings of the 6th DFN-CERT Workshop ``Sicherheit 
                  in vernetzten Systemen''},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {1999},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Hamburg, Germany},
  month = 	 mar,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {in German},
  annote = 	 {Ueberblick ueber Protokolle zum fairen Austausch von
     Ware gegen Geld in unsicheren Netzen wie dem Internet. Diskussion
     der Begriffe starker und schwacher Fairness von Asokan und 
     der spaerlichen Literatur zu diesem Thema. Vorstellung einiger
     Protokolle zur starken Fairness mit Vermittlern: (1) mit aktivem
     Vermittler, (2) optimistisch mit generierbaren Waren, (3) optimistisch
     mit Zahlungswiderrufsmoeglichkeit, (4) optimistisch mit generierbarer
     Ware und Widerrufbarkeit. Am Ende Diskussion von Anonymitaet, die
     wenig Auswirkungen auf die vorgestellten Protokolle hat.}
}




@incollection{Wilhelm:1999:ITT,
  year =        {1999},
  address =     {New York, NY, USA},
  pages =       {471--491},
  series =      {Lecture Notes in Computer Science},
  title =       {Introducing Trusted Third Parties to the Mobile Agent Paradigm},
  author =      {U. G. Wilhelm and S. Staamann and L. Butty\`an},
  booktitle =   {Secure Internet Programming: Security Issues for Mobile and Distributed Objects},
  publisher =   pub-SV,
  editor =      {J. Vitek and C. Jensen},
  keywords =    {Security},
  volume =      {1603},
  annote =      "[got it?]"
}

@PhdThesis{Wilhelm:1999:TAP,
  author =     {U. G. Wilhelm},
  title =      {A Technical Approach to Privacy based on 
                  Mobile Agents protected by Tamper-resistant Hardware},
  school =     {{\'E}cole Polytechnique F{\'e}d{\'e}rale de Lausanne},
  year =       1999,
  address =    {Switzerland},
  number =     {1961},
  month =      may
}



@Article{Aguilera:2000:FDC,
  author =       "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg",
  title =        "Failure Detection and Consensus in the Crash Recovery
                 Model",
  journal =      "Distributed Computing",
  year =         "2000",
  alt-url =      "http://www.cs.cornell.edu/home/sam/FDpapers/crash-recovery-finaldcversion.ps",
  url =          "http://link.springer.de/link/service/journals/00446/papers/0013002/00130099.pdf",
  pages =        "99--125",
  volume =       "13",
  number =       "2",
  month =        apr,
  abstract =     "We study the problems of failure detection and
                 consensus in asynchronous systems in which processes
                 may crash and recover, and links may lose messages. We
                 first propose new failure detectors that are
                 particularly suitable to the crash-recovery model. We
                 next determine under what conditions stable storage is
                 necessary to solve consensus in this model. Using the
                 new failure detectors, we give two consensus algorithms
                 that match these conditions: one requires stable
                 storage and the other does not. Both algorithms
                 tolerate link failures and are particularly efficient
                 in the runs that are most likely in practice - those
                 with no failures or failure detector mistakes. In such
                 runs, consensus is achieved within $3 \delta$ time and
                 with 4 n messages, where $\delta$ is the maximum
                 message delay and n is the number of processes in the
                 system.",
  annote = "Description in \cite{Aguilera:1998:FDCTR}."
}

@Article{Aguilera:2000:QRC,
  author = 	 {Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg},
  title = 	 {On quiescent reliable communication},
  journal = 	 {SIAM Journal on Computing},
  year = 	 {2000},
  OPTkey = 	 {},
  volume = 	 {29},
  number = 	 {6},
  pages = 	 {2040--2073},
  month = 	 dec,
  url = 	 {\url{http://www.cs.cornell.edu/Info/People/sam/FDpapers/ACTquiescent-SIAM.ps}.},
  annote = 	 {Quiescent algorithms are those that eventually stop
    sending messages. Quiescent reliable communication protocols are
    algorithms like reliable broadcast or uniform reliable broadcast
    that are quiescent. The authors study quiescent reliable communication
    algorithms in systems where processes may crash and links are fair.
    A link is fair if it does not introduce spurious messages and if
    a message which is sent infinitely often is received infinitely often.
    In such systems it is impossible to implement quiescent reliable
    communication without failure detectors. Why? Reliable communication
    means that whenever nodes $s$ and $r$ are correct and $s$ sends a
    message to $r$, then $r$ must eventually receive the message. However,
    $s$ must achieve this by sending only finitely many messages. Any
    number of messages may be lost due to the fair channels, and so 
    $s$ can never be sure whether $r$ has crashed or is alive if it does
    not receive an acknowledgement.  Failure detection can help in
    this case.  However, the usual failure detectors which output
    lists of suspects are not very useful. Any such bounded failure
    detector that helps solve quiescent reliable communication is
    at least as powerful as the eventually perfect failure detector.
    Why? The bound on the output of the failure detector implies that
    eventually it will keep on repeating the same (limit) values again
    and again. The existence of a quiescent communication primitive
    however implies that the limit value is in fact the set of correct
    processes. Thus, using this failure detector it is possible to
    emulate an eventually perfect failure detector. Next, the authors
    introduce a new type of failure detector called Heartbeat which
    has an unbounded output range. The range is a vector of elements
    (one for each process, or neighboring process) that keeps on
    increasing without bound as long as that process is alive. Thus,
    the failure detector can now be used to keep the system going.
    To achieve quiescence it is now possible to take a change in
    the heartbeat failure detector as the cause of a retransmission
    unless an acknowledgement has been received. In a sense, the
    decision whether to stop or not is transfered into the failure
    detector. Obviously, heartbeat is implementable in asynchronous
    systems (the authors give an implementation), and naturally,
    such an implementation cannot be quiescent. In systems where
    heartbeat is available quiescent reliable communication can be
    achieved and so fair links lose their danger: many algorithms that
    rely on reliable links can now be transformed into environments
    with lossy links (fair ones, not fair lossy \cite{Basu:1996:SRL}),
    whenever Heartbeat is available.  It must however be checked 
    whether reliable can be substituted with ``quasi-reliable''
    communication (quasi-reliable is equal to reliable if processes
    do not crash during quasi-reliable sending). The concluding 
    remarks touch some other interesting points: (1)
    message buffering can be limited by at some point excluding 
    suspected processes from the active group (i.e. explicitly 
    crashing them). The heartbeat implementation will however
    ensure that no messages are sent to them long before they are
    excluded. (2) a terminating protocol is quiescent, but a 
    quiescent protocol need not terminate. A layering technique
    is proposed that has failure detection as a basic mechanism
    (non-quiescent, non-terminating), building upon failure detection
    is reliable communication (quiescent, non-terminating), and
    on top can be terminating applications like consensus. (3)
    fair lossy \cite{Basu:1996:SRL} is opposed to fair channels,
    stating that the results also hold for fair lossy links, only
    that expensive piggybacking is required in this case. (4) failure
    detectors with finite output range have limitations (this is
    obvious from the fact that quiescent reliable communication
    needs an eventually perfect failure detector if the output range
    is bounded and such a detector is impossible to implement in
    asynchronous systems). However, when comparing failure detectors
    it is necessary to see whether the transformation is quiescent
    too. }
}



@InProceedings{Aguilera:2000:TGB,
  author = 	 {Marcos Kawazoe Aguilera and Carole Delporte-Gallet and 
                  Hugues Fauconnier and Sam Toueg},
  title = 	 {Thrifty generic broadcast},
  booktitle = 	 {Proceedings of the 14th International Symposium on 
                  Distributed Computing (DISC)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {268--282},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  number = 	 {1914},
  series = 	 ser-LNCS,
  address = 	 {Toledo, Spain},
  month = 	 oct,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {Looks at atomic broadcasts where the total order may
    be relaxed. Implementations of such operators can of course rely
    on atomic broadcast, but this is unsatisfactory. The scrictness
    property proposed by Pedone and Schiper (generic broadcast) is
    not sufficient. In this paper, new definitions for a broadcast
    to be a good implementation of generic broadcast are proposed. 
    The definition is based on the notion of using an oracle like
    a failure detector. A generic broadcast implementation is good
    (=thrifty) if the implementation uses the oracle only when 
    conflicting messages need to be processed (a more formal definition
    is: if there is a time after which only non-conflicting messages
    are brodcast, then there is a time after which the oracle is
    not used anymore). The oracle used is in fact atomic broadcast.}
}

@InProceedings{Arora:2000:RVC,
  author =       "Anish Arora and Sandeep Kulkarni and Murat Demirbas",
  title =        "Resettable vector clocks",
  booktitle =    "Proceedings of the Nineteenth Annual ACM
                 Symposium on Principles of Distributed Computing (PoDC)",
  pages =        "269--278",
  year =         "2000",
  annote = "Resettable vector clocks are vector clocks that use bounded
    state space. This paper identifies assumptions under which vector
    clocks may be replaced by resettable vector clocks in an application
    without endangering its correctness. Then resettable vector clocks
    are made stabilizing fault tolerant using detectors and correctors
    (a global reset is fired on local detection)."
}



@InCollection{Arora:2000:S,
  author = 	 {Anish Arora},
  title = 	 {Stabilization},
  booktitle = 	 {Encyclopedia of Distributed Computing},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  publisher = {Kluwer},
  year = 	 {2000},
  editor = 	 {Partha Dasgupta and Joseph E. Urban},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTtype = 	 {},
  OPTchapter = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  url =          "\url{ftp://ftp.cis.ohio-state.edu/pub/anish/papers/stb.ps.gz}",
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@Article{Bernhardt:2000:RDR,
  author = 	 {Ute Bernhardt},
  title = 	 {{Reiten auf der Risikowelle (Editorial zum Sonderheft
                  zum Thema ``Verletzlichkeit der 
                  Informationsgesellschaft'')}},
  journal = 	 {FIfF Kommunikation},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  number = 	 {3},
  pages = 	 {3},
  month = 	 sep,
  OPTnote = 	 {},
  annote = 	 {Editorial zum Sonderheft. Im Sonderheft selbst sind
                  ausnahmslos lesenswerte Artikel beispielsweise ueber 
                  kritische Infrastrukturen \cite{Schulzki:2000:KI}, 
                  Cybercime, Jugendschutz im 
                  Internet und Vertrauen. Interessant ist, dass etwa
                  zur selben Zeit eine thematisch aehnliche Ausgabe von
                  IEEE Computer erscheint \cite{Jones:2000:CBS}.}
}


@InProceedings{Boichat:2000:RBC,
  author = 	 {Romain Boichat and Rachid Guerraoui},
  title = 	 {Reliable Broadcast in the Crash-Recovery Model},
  booktitle = 	 pro-srds2000,
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {N\"urnberg, Germany},
  month = 	 oct,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  OPTannote = 	 {}
}



@Article{Bowen:2000:ESC,
  author = 	 {Jonathan Bowen},
  title = 	 {The ethics of safety-critical systems},
  journal = 	 j-CACM,
  year = 	 {2000},
  OPTkey = 	 {},
  OPTvolume = 	 {43},
  OPTnumber = 	 {4},
  OPTpages = 	 {91--97},
  OPTmonth = 	 apr,
  OPTnote = 	 {},
  OPTannote = 	 {Presents sins and truths of safety critical systems
                  engineering. Explicitly discusses formal methods.}
}



@TechReport{Brasileiro:2000:COC,
  author = 	 {Francisco Brasileiro and {Fab\'\i{}ola} Greve and Achour
                  {Most\'efaoui} and Michel Raynal},
  title = 	 {Consensus in one communication step},
  institution =  {IRISA},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {PI-1321},
  address = 	 {Rennes, France},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  annote = 	 {[to read]}
}

@InProceedings{Breitling:2000:MFD,
  author = 	 {Max Breitling},
  title = 	 {Modeling faults of distributed, reactive systems},
  booktitle = 	 {Formal Techniques in Real-Time and Fault-Tolerant 
                  Systems, 6th International Symposium (FTRTFT 2000) 
                  Proceedings},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {58--69},
  year = 	 {2000},
  editor = 	 {Mathai Joseph},
  OPTvolume = 	 {},
  number = 	 {1926},
  series = 	 ser-LNCS,
  address = 	 {Pune, India},
  month = 	 sep,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {Models faults as addition of variables and transitions in
                  a special formalism (Fokus) which supports compositionality
                  and refinement.}
}


@Article{Buschek:2000:M4W,
  author = 	 {Oliver Buschek},
  title = 	 {{Mit dem 486er zur Raumstation}},
  journal = 	 {Chip},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  pages = 	 {92--98},
  month = 	 feb,
  OPTnote = 	 {},
  annote = 	 {Leicht verstaendlicher Einstieg in Themen der Fehlertoleranz
                 im Weltraum. Fokus auf ISS: Dort sind 6fach redundante
                 Schuhschachteln, die Byzantinisches Agreement machen, drin.
                 Gibt auch Hinweise auf Webadressen der Nasa und ESA.}
}



@Misc{Cachin:2000:RMU,
  OPTkey = 	 {},
  author = 	 {C. Cachin and J. Camenisch and M. Dacier and Y. Deswarte
                  and J. Dobson and D. Horne and K. Kursawe and J.-C. Laprie
                  and J.-C. Lebraud and D. Long and T. McCutcheon and 
                  J. {M\"uller} and F. Petzold and B. Pfitzmann and D. Powell
                  and B. Randell and M. Schunter and V. Shoup and 
                  P. Ver{\'\i}ssimo and G. Trouessin and R. J. Stroud and
                  M. Waidner and I. S. Welch},
  title = 	 {Reference Model and Use Cases},
  OPThowpublished = {},
  month = 	 aug,
  year = 	 2000,
  note = 	 {Deliverable D1 of the MAFTIA project \cite{MAFTIA}.},
  OPTannote = 	 {}
}

@InProceedings{Cachin:2000:ROC,
  author = 	 {Christian Cachin and Klaus Kursawe and Victor Shoup},
  title = 	 {Random oracles in constantinople: practical asynchronous
                  {Byzantine} agreement using cryptography},
  booktitle = 	 {Proceedings of the Symposium on Principles of Distributed
                  Computing},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {123--132},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Portland, Oregon},
  OPTmonth = 	 {},
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {Looks at randomized Byzantine agreement and presents an
     optimistic solution using a randomized and cryptographically secure
     coin toss. This is a good example on what and where fault-tolerance
     can learn from cryptography.}
}

@InProceedings{Charron-Bost:2000:RSL,
  author = 	 {Bernadette Charron-Bost and Sam Toueg and Anindya Basu},
  title = 	 {Revisiting safety and liveness in the context of failures},
  booktitle = 	 {Proceedings of CONCUR2000 -- Concurrency Theory, 
                  11th Int. Conference},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {552--565},
  year = 	 {2000},
  editor = 	 {C. Palamidessi},
  OPTvolume = 	 {},
  number = 	 {1877},
  series = 	 ser-LNCS,
  address = 	 {University Park, PA},
  month = 	 aug,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = {Agreement in consensus is defined as ``no two correct
        processes decide differently''. Against common belief, this is
        a liveness property in systems where processes may crash. This
        is because if two processes have decided differently, then
        agreement can still be achieved if one of them crashes. The
        authors define pure safety and pure liveness meaning that
        safety and liveness hold without ``the help or non-help of
        failures''. Pure liveness means that something good can still
        happen without the help of failures. Pure safety means that
        executions which do not satisfy the property must contain
        failures to satisfy the property. Pure versions are strictly
        weaker than the original versions. The authors define a
        property transformer `Pure' that `makes a property pure' by
        removing all executions which contain undesirable partial
        runs. Pure agreement demands that no two alive processes
        decide differently and comes closer to our intuition of
        agreement in consensus. Pure agreement is stronger than
        uniform agreement but weaker than agreement. Shows that every
        pure property is the intersection of a pure safety and a pure
        liveness property.  A startling paper which demands more
        investigation!}
}

@InProceedings{Charron-Bost:2000:SSP,
  author =       "Bernadette Charron-Bost and Rachid Guerraoui and 
                 {Andr\'e} Schiper",
  title =        "Synchronous System and Perfect Failure Detector:
                 Solvability and Efficiency Issues",
  booktitle =    "International Conference on Dependable Systems and
                 Networks (IEEE Computer Society)",
  year =         "2000",
  annote = "Looks at the relation between the synchronous system model 
    and the asynchronous model augmented with perfect failure detectors.
    They show that there are problems which are solvable in synchronous
    systems but are unsolvable in asynchronous systems with perfect failure
    detectors. Hence, both models are not equivalent in this respect. 
    This is because failure detectors give no information on the
    causal relation between the crash event and other events on the
    crashed process. This means that you cannot decide whether there
    is still a message in transit coming from the crashed process
    or not. If you want to base a decision on this fact you have 
    the same dilemma as in FLP \cite{Fischer:1985:IDC}. But if it
    comes to consensus, both models are ok because you can solve
    this problem in both. However, in synchronous systems algorithms
    can be constructed with a lower latency degree \cite{Schiper:1997:ECA}
    so more efficient solutions are possible in the synchronous model."
}




@InProceedings{Chen:2000:QOS,
  author = 	 {Wei Chen and Sam Toueg and Marcos Kawazoe Aguilera},
  title = 	 {On the quality of service of failure detectors},
  booktitle = 	 {Proceedings of the International Conference on 
                  Dependable Systems and Networks (DSN 2000)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {New York},
  month = 	 jun,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  annote = 	 {In a system model where message delays and losses follow
    some probability distribution the authors study performance metrics
    regarding the accuracy and completeness of the failure detectors
    which were introduced in the time free model \cite{Chandra:1996:UFD}.
    Metrics concerning completeness are the detection time (i.e. the
    time between the crash and the detection of the crash). Metrics
    concerning accuracy are mistake recurrence time (the time between
    two successive mistakes) and mistake duration (the time it takes
    to correct a mistake). Other accuracy metrics can be derived from
    them (average mistake rate, query accuracy probability, good period
    duration, forward good period duration). An algorithm is presented
    which achieves optimality concerning some metrics and is based
    on synchronzed clocks: a timeout is started not when a hartbeat
    arrives but at certain freshness points which are at equal intervals
    at both processes (with a message delay difference). Discusses 
    how to tune the parameters of the algorithm to perform nearly optimal
    and presents some ideas concerning adaptivity. Gives an overview
    over other failure detection approaches in the literature.}
}

@Article{Crawford:2000:BNP,
  author = 	 {Gregory P. Crawford},
  title = 	 {A bright new page in portable displays},
  journal = 	 {IEEE Spectrum},
  year = 	 {2000},
  OPTkey = 	 {},
  volume = 	 {37},
  number = 	 {10},
  pages = 	 {40--46},
  month = 	 oct,
  OPTnote = 	 {},
  annote = 	 {Gives insight in new display technology aka smart paper.
                  Presents some fascinating photos of a cholestoric LCD
                  display of Kent Displays Inc., Kent, Ohio, which reflective
                  (needs no back light) and does not need power to hold 
                  the image.  Also describes the technologies behind this
                  display and Gyricon (Xerox) and E ink.}
}




@PhdThesis{Defago:2000:ARP,
  author = 	 {Xavier {D\'efago}},
  title = 	 {Agreement-related problems: from semi-passive replication
                  to totally ordered broadcast},
  school = 	 {{\'Ecole Polytechnique F\'ed\'erale de Lausanne}},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTtype = 	 {},
  address = 	 {Lausanne, Switzerland},
  OPTmonth = 	 {},
  note = 	 {Thesis number 2229},
  OPTannote = 	 {}
}

@Article{Ditlea:2000:PCG,
  author = 	 {Steve Ditlea},
  title = 	 {The {PC} goes ready-to-wear},
  journal = 	 {IEEE Spectrum},
  year = 	 {2000},
  OPTkey = 	 {},
  volume = 	 {37},
  number = 	 {10},
  pages = 	 {34--39},
  month = 	 oct,
  OPTnote = 	 {},
  annote = 	 {This is more a market survey of wearables, presenting
                  display technology, prototypes (Xybernaught, IBM etc)
                  and e.g. Twiddler chorded keyboard. For a visionary
                  article see \cite{Billinghurst:1999:WDN}. }
}



@Book{Dolev:2000:SS,
  author = 	 {Shlomi Dolev},
  ALTeditor = 	 {},
  title = 	 {Self-Stabilization},
  publisher = 	 {MIT Press},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@InProceedings{Echtle:2000:FFM,
  author = 	 {Klaus Echtle and Asif Masum},
  title = 	 {A fundamental failure model for fault-tolerant protocols},
  booktitle = 	 {Proceedings of the IEEE International Computer 
                  Performance and Dependability Symposium (IPDS2K)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {69--78},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Chicago, IL},
  OPTmonth = 	 {},
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  annote = 	 {See also \cite{Echtle:1999:UCB,Masum:2000:NCB}. A
    more elaborate description is attached to the entry of 
    \cite{Echtle:1999:UCB}.}
}


@InProceedings{Gaertner:2000:CDG,
  author = 	 {Felix C. G\"artner and Sven Kloppenburg},
  title = 	 {Consistent Detection of Global Predicates Under a 
                  Weak Fault Assumption},
  booktitle = 	 pro-srds2000,
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {94--103},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {N\"urnberg, Germany},
  month = 	 oct,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  OPTannote = 	 {}
}



@TechReport{Gaertner:2000:RIS,
  author = 	 {Felix C. {G\"artner} and Hagen {V\"olzer}},
  title = 	 {Redundancy in space in fault-tolerant systems},
  institution =  {Department of Computer Science,
	Darmstadt University of Technology},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {TUD-BS-2000-06},
  address = 	 {Darmstadt, Germany},
  month = 	 jul,
  url =          "\url{http://www.informatik.tu-darmstadt.de/BS/Gaertner/publications/TUD-BS-2000-06.ps.gz}",
  OPTnote = 	 {},
  OPTannote = 	 {}
}




@InProceedings{Hiller:2000:EAD,
  author = 	 {Martin Hiller},
  title = 	 {Executable assertions for detecting data errors in 
                  embedded control systems},
  OPTcrossref =  {},
  OPTkey = 	 {},
  booktitle = {Proceedings of the International Conference on Dependable
                  Systems and Network (DSN 2000)},
  pages = 	 {24--33},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}


@InProceedings{Huang:2000:TFP,
  author = 	 {Shing-Tsaan Huang},
  title = 	 {The fuzzy philosophers},
  booktitle = 	 {Proceedings of the 15th IPDPS 2000 Workshops},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {130--136},
  year = 	 {2000},
  editor = 	 {J. Rolim et al.},
  volume = 	 {1800},
  OPTnumber = 	 {},
  series = 	 ser-LNCS,
  address = 	 {Cancun, Mexico},
  month = 	 may,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {Generalization of the dining philosophers and a 
                  self-stabilizing solution.}
}

@Article{Hutter:2000:AII,
  author =       {Reinhard Hutter},
  title =        {{Angriffe auf Informationstechnik und Infrastrukturen --
                  Realit\"at oder Science Fiction?}},
  journal =      {Aus Politik und Zeitgeschichte},
  year =         {2000},
  OPTkey =       {},
  volume =       {41--42},
  OPTnumber =    {},
  pages =        {31--38},
  OPTmonth =     {},
  OPTnote =      {},
  annote =       {Gute Einfuehrung in und Referenz zu kritischen 
                  Infrastrukturen, eher aus allgemeinverstaendlicher
                  und politikwissenschaftlicher Sicht.}
}


@Article{Jones:2000:CBS,
  author = 	 {Anita Jones},
  title = 	 {The challenge of building survivable information-intensive
                  systems (introduction to special issue on ``critical 
                  infrastructures'')},
  journal = 	 {IEEE Computer},
  year = 	 {2000},
  OPTkey = 	 {},
  volume = 	 {33},
  number = 	 {8},
  pages = 	 {39--43},
  month = 	 aug,
  OPTnote = 	 {},
  annote = 	 {A German Journal with similar directions appeared at about
                  the same time \cite{Bernhardt:2000:RDR}.}
}


@InProceedings{Karjoth:2000:SMA,
   author =      {G\"{u}nter Karjoth},
   title =       "Secure Mobile Agent-Based Merchant Brokering in
                   Distributed Marketplaces",
   booktitle =   asama2000,
   pages =       "44--56",
   year =        2000,
   address =     "Zurich, Switzerland",
   month =       sep,
   volume =      "1882",
   series =      ser-LNCS,
   publisher =   pub-SV,
   keyword =     "agents, e-commerce, security, mobile agent",
   abstract =    {Cooperating merchants establish a distributed
                   marketplace under the auspices of an independent
                   market authority. Each merchant's server is equipped
                   with a trusted device, a smart card for example,
                   provided by the market authority. The market
                   authority plays the role of a trusted third party
                   for the customer as well as for the merchants. This
                   paper describes protocols that prevent the malicious
                   alteration of the data collected by visiting mobile
                   agents roaming through the marketplace without being
                   detectable by subsequent servers or by the owner of
                   the agent upon its return. Another protocol makes
                   the trusted device a secure execution platform for
                   routines provided by the agent owner. }
}


@Article{Kehr:2000:SV,
  author = 	 {Roger Kehr},
  title = 	 {Spontane {Vernetzung}},
  journal = 	 {Informatik Spektrum},
  year = 	 {2000},
  OPTkey = 	 {},
  volume = 	 {23},
  number = 	 {3},
  pages = 	 {161--172},
  month = 	 jun,
  OPTnote = 	 {},
  annote = 	 {Good survey of the three main methods for spontaneous
                  networking (Jini, SLP, UPnP). Also discusses Bluetooth.
                  Good starting point for german readers.}
}

@TechReport{Kloppenburg:2000:CDG,
  author = 	 {Sven Kloppenburg and Felix C. {G\"artner}},
  title = 	 {Consistent detection of global predicates in 
                  asynchronous systems with crash failures},
  institution =  {Darmstadt University of Technology, Department of
                  Computer Science},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {TUD-BS-2000-01},
  address = 	 {Darmstadt, Germany},
  month = 	 feb,
  url = "\url{http://www.informatik.tu-darmstadt.de/BS/Gaertner/publications/TUD-BS-2000-01.abstract.html}",
  OPTnote = 	 {},
  OPTannote = 	 {}
}



@InProceedings{Kulkarni:2000:AAF,
  author = 	 {Sandeep S. Kulkarni and Anish Arora},
  title = 	 {Automating the addition of fault-tolerance},
  booktitle = 	 {Formal Techniques in Real-Time and Fault-Tolerant 
                  Systems, 6th International Symposium (FTRTFT 2000) 
                  Proceedings},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {82--93},
  year = 	 {2000},
  editor = 	 {Mathai Joseph},
  OPTvolume = 	 {},
  number = 	 {1926},
  series = 	 ser-LNCS,
  address = 	 {Pune, India},
  month = 	 sep,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {Looks at automatically adding detectors and correctors
                  in the sense of \cite{Arora:1998:DCT} to existing programs.
                  Specifications are fusion- and suffix-closed, giving
                  ``bad'' transitions which violate safety. Idea of fail-safe
                  fault-tolerance is to cut away all paths leading to 
                  these bad transitions.  This must be possible without
                  changing the original behavior. Non-masking fault-tolerance
                  is achieved by adding transitions from all states outside 
                  of the invariant to states within. Masking fault-tolerance
                  is somewhat more complex. States that adding fault-tolerance
                  is NP-complete but refers the proof to a TR. From a
                  conceptual point of view is similar to 
                  \cite{Gaertner:2000:RIS}.  An important point in the
                  transformation is that the fault-tolerant version must
                  not contain ``new'' ways to satisfy the specification.}
}

@Article{Kumagai:2000:LEV,
  author = 	 {Jean Kumagai},
  title = 	 {faults \& failures: {London} stock exchange vanishes
                  for 8 hours},
  journal = 	 {IEEE Spectrum},
  year = 	 {2000},
  OPTkey = 	 {},
  volume = 	 {37},
  number = 	 {6},
  pages = 	 {30--31},
  month = 	 jun,
  OPTnote = 	 {},
  annote = 	 {Sketches the 8 hour blackout of the London stock
                  exchange (LSE) on April 5, 2000. Slow overnight batch jobs
                  had caused old prices to get mixed up with new prices.
                  Frantic calls from traders pursuaded the LSE to delay
                  trading until the problem was fixed (trading is useless
                  with wrong prices). This delay lasted 8 hours. The
                  reason for the slow batch job was an inherent program
                  inefficiency combined with an unusually high volume
                  of data. Fixing required rewriting a couple of lines
                  of code --- ``absolutely trivial''. Costs are 
                  estimated in the millions of pounds.}
}


@Article{Lamport:2000:FAH,
  author =       "Leslie Lamport",
  title =        "Fairness and hyperfairness",
  pages =        "239--245",
  year =         "2000",
  abstract =     "The notion of fairness in trace-based formalisms is 
                examined. It is argued that, in general, fairness means
                 machine closure. The notion of hyperfairness introduced
                 by Attie, Francez, and Grumberg is generalized to
                 arbitrary action systems. Also examined are the
                 fairness criteria proposed by Apt, Francez, and Katz.",
  url =          "http://link.springer.de/link/service/journals/00446/papers/0013004/00130239.pdf",
  volume =       "13",
  number =       "4",
  journal =      "Distributed Computing",
  annote = "There's a good quote here about reasoning about liveness
    properties: ``Fairness condiations are a way of expressing
    liveness properties, and liveness properties are inherently
    problematic. The question of whether a real system satisfies a
    liveness property is meaningless; it can be answered only by
    observing the system for an infinite length of time, and real
    systems don't run forever. Liveness is always an approximation to
    the property we really care about. We want a program to terminate
    within 100 years, but proving that it does would require addition
    of distracting timing assumptions. So, we prove the weaker
    condition that the program eventually terminates. This doesn't
    prove that the program will terminate within our lifetimes, but it
    does demonstrate the absence of infinite loops.''  This is a
    must-read paper for people interested in liveness issues."
}

@InProceedings{Lano:2000:IBS,
  author = 	 {K. Lano and David Clark and K. Androutsopoulos and P. Kan},
  title = 	 {Invariant-based synthesis of fault-tolerant systems},
  booktitle = 	 {Formal Techniques in Real-Time and Fault-Tolerant 
                  Systems, 6th International Symposium (FTRTFT 2000) 
                  Proceedings},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {46--57},
  year = 	 {2000},
  editor = 	 {Mathai Joseph},
  OPTvolume = 	 {},
  number = 	 {1926},
  series = 	 ser-LNCS,
  address = 	 {Pune, India},
  month = 	 sep,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {Uses a precise formal semantics of statecharts to 
     compositionally develop and verify systems. Presents a fault-tolerant
     production cell as a case study.}
}


@TechReport{Larrea:2000:ECF,
  author = 	 {Mikel Larrea and Antonio {Fern\'andez} and 
                  Sergio {Ar\'valo}},
  title = 	 {Eventually consistent failure detectors},
  institution =  {Universidad {P\'ublica} de Navarra, Spain},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTtype = 	 {},
  OPTnumber = 	 {},
  OPTaddress = 	 {},
  month = 	 apr,
  note = 	 {Presented as a brief announcement at DISC2000},
  url = "\url{http://www.gsd.unavarra.es/pres/miembros/mikel/consistent.ps}",
  annote = {A new class of failure detectors is presented called
    `eventually consistent'. The weak accuracy property is enriched by
    a function with which processes can identify the `sommon one'
    process which is not wrongly suspected. This can be seen as a type
    of leader election capability. Eventually consistent failure detectors
    lie between eventually perfect and eventually strong ones. The additional
    information offered by this failure detector allows more efficient
    consensus algorithms. Since everybody eventually focusses on one
    and the same process as a coordinator, consensus algorthms are possible
    which do not rely on the rotating coordinator paradigm.  This is part
    of Mikel's PhD research (see also \cite{Larrea:2000:OIW}).}
}

@InProceedings{Larrea:2000:OIW,
  author = 	 {Mikel Larrea and Antonio Fern\'andez and
                  Sergio Ar\'evalo},
  title = 	 {Optimal Implementation of the Weakest Failure Detector 
                  for Solving Consensus},
  booktitle = 	 pro-srds2000,
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {N\"urnberg, Germany},
  month = 	 oct,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  OPTannote = 	 {}
}






@InProceedings{Mantel:2000:CSM,
  author = 	 {Heiko Mantel and Felix C. {G\"artner}},
  title = 	 {A case study in the
                  mechanical verification of fault tolerance},
  booktitle = 	 {Proceedings of the 13th
                  International Florida Artificial Intelligence 
                  Conference (FLAIRS-2000)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Orlando, FL},
  month = 	 may,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  annote = 	 {Preliminary version available as TR \cite{Mantel:1999:CSM}.}
}



@Article{Mantel:2000:ACS,
  author = 	 {Heiko Mantel and Felix C. {G\"artner}},
  title = 	 {A case study in the
                  mechanical verification of fault tolerance},
  journal = 	 {Journal of Experimental \& Theoretical Artificial 
                  Intelligence (JETAI)},
  year = 	 {2000},
  OPTkey = 	 {},
  volume = 	 {12},
  number = 	 {4},
  pages = 	 {473--488},
  month = 	 oct,
  OPTnote = 	 {},
  OPTannote = 	 {}
}



@PhdThesis{Masum:2000:NCB,
  author = 	 {Asif Masum},
  title = 	 {Non-cooperative {Byzantine} failures: {A} new framework
                  for the design of efficient fault tolerance protocols},
  school = 	 {Universit\"at-Gesamthochschule Essen, Fachbereich Mathematik 
                  und Informatik},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTtype = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  note = 	 {Published by Libri Books on demand, ISBN 3-8311-0815-3.},
  annote = 	 {Conference version e.g. \cite{Echtle:1999:UCB}. Good
      overview over failure classification schemes.}
}


@article{Matsui:2000:FTS,
 volume       ={E83-D},
 number       ={10},
 pages        ={1831--1840},
 year         ={2000},
 month        =oct,
 journal      ={IEICE Transactions},
 publisher    ={Institute of Electronics, Information and
                Communication Engineers},
 title        ={Fault-Tolerant and Self-Stabilizing Protocols Using an
                Unreliable Failure Detector},
 author       ={H. Matsui and M. Inoue and T. Masuzawa and H. Fujiwara},
 abstract ={We investigate possibility of fault-tolerant and
    self-stabilizing protocols (ftss protocols) using an unreliable
    failure detector. Our main contribution is (1) to newly introduce
    k-accuracy of an unreliable failure detector, (2) to show that
    k-accuracy of a failure detector is necessary for any ftss k-group
    consensus protocol, and (3) to present three ftss k-group
    consensus protocols using a k-accurate and weakly complete failure
    detector under the read/write daemon on complete networks and on
    (n-k+1)-connected networks, and under the central daemon on
    complete networks.}, keywords ={distributed algorithms;
    self-stabilization; fault-tolerance; failure detector; x-group
    consensus},
 annote = "The term $k$-accuracy means that at least $k$ correct
    processes will not be wrongly suspected by the failure
    detector. $k=1$ is the same as weak accuracy while $k=n-t$ is the
    same as strong accuracy. (See also the eventual consistency
    definition of \cite{Larrea:2000:ECF}.) In a $k$-group consensus
    protocol all correct processes must eventually choose the same
    group of $k$ processes. This looks something like self-stabilizing
    $k$ leader election."  
}

@InProceedings{Mittal:2000:DDP,
  author =       "Neeraj Mittal and Vijay K. Garg",
  title =        "Debugging Distributed Programs Using Controlled
                 Re-execution",
  pages =        "239--248",
  booktitle =    "Proceedings of the 19th Annual {ACM} Symposium on
                 Principles of Distributed Computing ({PODC}-00)",
  month =        jul # " ~16--19",
  publisher =    "ACM Press",
  address =      "NY",
  year =         "2000",
  annote = "Controlled re-execution means to execute a distributed program
    so that a given safety property is maintained during that execution.
    The authors identify a class of predicated for which this can be
    done efficiently, i.e. without much synchronization. There are some
    resemblances here to Schneider's enforceable security policies
    cite{Schneider:2000:ESP}."
}

@InProceedings{Mostefaoui:2000:KSA,
  author =       "Achour {Most\'efaoui} and Michel Raynal",
  title =        "{\it{k}}-Set Agreement with Limited Accuracy Failure
                 Detectors",
  pages =        "143--152",
  booktitle =    "Proceedings of the 19th Annual {ACM} Symposium on
                 Principles of Distributed Computing ({PODC}-00)",
  month =        jul # " ~16--19",
  publisher =    "ACM Press",
  address =      "NY",
  year =         "2000",
  annote = "Looks at the $k$-set agreement of \cite{Chaudhuri:1990:AHC}
    and shows the possibility and impossibility of solving it under
    different assumptions which include a failure detector with limited
    scope. Informally, the scope of the accuracy property is the number
    of processes that may not suspect a correct process."
}

@InProceedings{Namjoshi:2000:CCR,
  author = 	 {Kedar S. Namjoshi and Richard J. Trefler},
  title = 	 {On the completeness of compositional reasoning},
  booktitle = 	 {Proceedings of the 12th Int. Conference on 
                  Computer Aided Verification (CAV2000)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {139--153},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  number = 	 {1855},
  series = 	 ser-LNCS,
  OPTaddress = 	 {},
  month = 	 jul,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {Gives examples of non-circular compositional reasoning,
     unlike \cite{Abadi:1993:CS} which is also shown to be incomplete.}
}



@Article{Oberg:2000:NBP,
  author = 	 {James Oberg},
  title = 	 {{NASA's} big push for the space station},
  journal = 	 {IEEE Spectrum},
  year = 	 {2000},
  OPTkey = 	 {},
  volume = 	 {37},
  number = 	 {11},
  pages = 	 {49--54},
  month = 	 nov,
  OPTnote = 	 {},
  annote = 	 {Describes problems and workarounds while deploying the
    new space station ISS. States that the software on the ISS is far from
    well tested because of the tight schedule. Cite: ``We launched the
    Space Shuttle when we were 90 percent ready, but we're launching
    Space Station at only 50 percent.'' An example of planning flaws is
    the construction of Plasma Contact Units (PCU): because the ISS runs
    130-180 V power (instead of 24-28 V in earlier designs) and orbits
    in thin plasma,  a voltage threshold for arcing (which is at about
    40 to 60 Vdc is surpassed by the outer skin of the spacecraft which
    endangers solar cells and outboard equipment and causes hazards
    for astronauts on space walks. Two PCUs were added to the design
    which are ion beams constantly shooting ions into space to decrease
    the electric potential. If one PCU breaks down, the other can still
    relieve the potential, but to fix a broken PCU a spacewalk is
    required! (The procedures now are to shut down part of the ISS
    in this situation and only run 24-28 V during repair.) Astronauts
    use IBM 760 Thinkpad laptop computers on board!  Shows that it is
    good to still rely on heavy duty experienced technology like
    Mir and Sojus.}
}

@InProceedings{Pagnia:2000:SFE,
   author =      {Henning Pagnia and Holger Vogt and Felix
                   C. G\"artner and Uwe G. Wilhelm},
   title =       "Solving Fair Exchange with Mobile Agents",
   booktitle =   asama2000,
   pages =       "57--72",
   year =        2000,
   address =     "Zurich, Switzerland",
   month =       sep,
   volume =      "1882",
   series =      ser-LNCS,
   publisher =   pub-SV,
   keyword =     "mobile agent, e-commerce, security",
   abstract =    { Mobile agents have been advocated to support
                   electronic commerce over the Internet. While being a
                   promising paradigm, many intricate problems need to
                   be solved to make this vision reality. The problem
                   of \emph{fair exchange} between two agents is one
                   such fundamental problem. Informally speaking, this
                   means to exchange two electronic items in such a way
                   that neither agent suffers a disadvantage. We study
                   the problem of fair exchange in the mobile agent
                   paradigm. We show that while existing protocols for
                   fair exchange can be substantially simplified in the
                   context of mobile agents, there are still many
                   problems related to security which remain difficult
                   to solve. We propose three increasingly flexible
                   solutions to the fair exchange problem and show how
                   to implement them using existing agent
                   technology. The basis for ensuring the security
                   properties of fair exchange is a tamper-proof
                   hardware device called a trusted processing
                   environment. },
}



@Article{Perry:2000:DAR,
  author = 	 {Tekla S. Perry},
  title = 	 {faults \& failures: Does anybody really know what time
                  it is?},
  journal = 	 {IEEE Spectrum},
  year = 	 {2000},
  OPTkey = 	 {},
  volume = 	 {37},
  number = 	 {10},
  pages = 	 {26--28},
  month = 	 oct,
  OPTnote = 	 {},
  annote = 	 {Another amusing story in this regular column: studies
                  the reasons behind the problem of VCRs not adjusting
                  to the right time. For much of 1999, video cassette
                  recorders (VCRs) around the U.S. were showing the wrong
                  time. It affected only machines which had an automatic
                  time adjuster builtin. This adapter reads the time
                  which is broadcasted as part of the public broadcasting
                  service (PBS, in German it's Videotext) and adjusts the
                  VCRs clock to it. The reason for this fault was an
                  incorrect time broadcasted by some PBS stations and it
                  took months to locate it. This was due to hardly any
                  user response (an article of a journalist triggered
                  a wide response after months) and due to PBS providers
                  not knowing how to set the broadcasted time correctly.}
}

@InProceedings{Pleisch:2000:MFT,
  author = 	 {Stefan Pleisch and {Andr\'e} Schiper},
  title = 	 {Modeling fault-tolerant mobile agent execution as a 
                  sequence of agreement problems},
  booktitle = 	 pro-srds2000,
  OPTcrossref =  {},
  OPTkey = 	 {},
  pages = 	 {11--20},
  year = 	 {2000},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {N\"urnberg, Germany},
  month = 	 oct,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  OPTannote = 	 {}
}


@TechReport{Prasetya:2000:FFT,
  author = 	 {I. S. W. B. Prasetya and S. D. Swierstra},
  title = 	 {Factorizing Fault Tolerance},
  institution =  {University of Utrecht, Department of Computer Science},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {UU-CS-2000-02},
  address = 	 {Utrecht, The Netherlands},
  OPTmonth = 	 {},
  note = 	 {Appears in special issue of TCS on fault tolerance},
  annote = 	 {This is the paper which first introduced me to 
                  the issue of composition of liveness properties.
                  The paper proposes a composition law which is based
                  on the notion of `temporal non-interference'. This
                  means the following: Given a component $P$ which 
                  satisfies $p\leadsto q$ and a component $Q$ which
                  does not interfere with $P$'s progress as long as
                  some flag $a$ is high, then the parallel composition
                  of $P$ and $Q$ satisfies $p\leadsto q$ if $P$ raises
                  $a$ long enough. The point is that unlike the usual
                  composition of e.g. self-stabilizing algorithms
                  (like in \cite{Herman:1991:ATD,Dolev:1993:SDS} and
                  also \cite{Gouda:1991:SCP}) the component $Q$ may
                  interfere with $P$ at some times (but only after
                  $P$ has reached progress). The composition law is
                  applied in an example where fault-tolerance is
                  achieved through exception handling. The framework
                  is built on top of UNITY \cite{Chandy:1988:PPD} and
                  checked using HOL.}
}


@Article{Randell:2000:TML,
  author =       "Brian Randell",
  title =        "{Turing Memorial Lecture}: Facing Up to Faults",
  journal =      j-COMP-J,
  volume =       "43",
  number =       "2",
  pages =        "95--106",
  year =         "2000",
  url =          "http://www3.oup.co.uk/computer_journal/hdb/Volume_43/Issue_02/430095.sgm.abs.html;
                 http://www3.oup.co.uk/computer_journal/hdb/Volume_43/Issue_02/430095.pdf",
  annote = "A wise and cunning look back at the central problems in
     fault tolerance from the viewpoint of one of the big
     men. Mentions Babbage's concern about correct mathematical
     navigation tables (see also \cite{Bowen:1993:SCS}) and his first
     ideas of n version programming.  Looks on the necessity of
     fault-tolerant computing (``the more dependable computing systems
     become, the more dependence is placed on them''). Recalls
     concepts from \cite{Laprie:1992:DBC} and explicitly notes that
     the quality of fault-tolerance depends heavily on the quality of
     the fault assumption (p.100).  Quote: ``Yet all too often,
     inadequate attention i paid to identifying and justifying a set
     of fault assumptions''.  Notes the problems with feature
     interaction and non-interference when it comes to
     compositionality. Quote: ``All fault tolerance involves the use
     of redundancy---of representation and/or activity---whose
     consistency can be checked.'' Notes that notions of diversity are
     not very well understood and that ad hoc standards in operating
     systems are a problem when it comes to fault tolerance through
     system diversity."
}


@Article{Schoder:2000:TOR,
  author =       "Detlef Schoder and Torsten Eymann",
  title =        "Technical opinion: The real challenges of mobile
                 agents",
  journal =      j-CACM,
  volume =       "43",
  number =       "6",
  pages =        "111--112",
  month =        jun,
  year =         "2000",
  coden =        "CACMA2",
  ISSN =         "0001-0782",
  bibdate =      "Mon Sep 25 15:22:32 MDT 2000",
  url =          "http://www.acm.org/pubs/citations/journals/cacm/2000-43-6/p111-schoder/",
  acknowledgement = ack-nhfb,
  subject =      "Computer Systems Organization ---
                 Computer-Communication Networks --- General (C.2.0);
                 Computer Systems Organization ---
                 Computer-Communication Networks --- Distributed Systems
                 (C.2.4); Computing Methodologies --- Artificial
                 Intelligence --- Distributed Artificial Intelligence
                 (I.2.11)",
  annote = "states that mobile agents should have a kind of
            self-stabilizing social behavior. Contrasts nicely to
            \cite{Lange:1999:SGR}."
}

@Article{Schulzki:2000:KI,
  author = 	 {Christiane Schulzki-Haddouti},
  title = 	 {{Kritische Infrastrukturen}},
  journal = 	 {FIfF Kommunikation},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {3},
  pages = 	 {19--20},
  month = 	 sep,
  OPTnote = 	 {},
  annote = 	 {Teil des Sonderheftes \cite{Bernhardt:2000:RDR}.}
}


@Article{Schumacher:2000:AI,
  author = 	 {M. Schumacher and M.L. Moschgath and U. Roedig},
  title = 	 {{Angewandte Informationssicherheit} --- {Ein 
                  Hacker-Praktikum an Universit\"aten}},
  journal = 	 {Informatik Spektrum},
  year = 	 {2000},
  OPTkey = 	 {},
  volume = 	 {23},
  number = 	 {3},
  pages = 	 {202--211},
  month = 	 jun,
  OPTnote = 	 {},
  annote = 	 {Presents an interesting course taught at TU Darmstadt: 
                  Students had to attack and defend a network of PCs to
                  learn the practices of ``real'' network security.}
}

@InProceedings{Stoller:2000:EDG,
  author = 	 {Scott D. Stoller and Leena Unnikrishnan and Yanhong A. Liu},
  title = 	 {Efficient detection of global properties in distributed 
                  systems using partial-order methods},
  booktitle = 	 {Computer Aided Verification (CAV 2000)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {2000},
  OPTeditor = 	 {},
  volume = 	 {1855},
  OPTnumber = 	 {},
  series = 	 ser-LNCS,
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  annote = 	 {Uses the ``persistent-set technique'' (a method known
    from partial order research to optimize state space search) to
    detect possibly and definitely in distributed computations. The
    algorithm is compared to two special case algorithms by Garg
    and Waldecker and it is shown to (a) handle a larger class of
    predicates, and (b) have the same worst case aymptotic time
    complexity. Results are backed by simulation data.}
}

@Book{Tel:2000:IDA,
  author = 	 {Gerard Tel},
  ALTeditor = 	 {},
  title = 	 {Introduction to Distributed Algorithms},
  publisher = 	 {Cambridge University Press},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  edition = 	 {Second},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}



@InProceedings{Verissimo:2000:TCB,
  author = "Paulo Ver\'{\i}ssimo and Antonio Casimiro and Christof Fetzer",
  title = "The Timely Computing Base: Timely Actions in the Presence 
           of Uncertain Timeliness",
  booktitle = "Proceedings of the International Conference on 
           Dependable Systems and Networks",
  year = "2000",
  abstractURL = "http://www.di.fc.ul.pt/~casim/papers/dsn00/dsn00.html",
  documentURL = "http://www.di.fc.ul.pt/~casim/papers/dsn00/dsn00.ps.gz",
  pages = "533--542",
  publisher = "IEEE Computer Society Press",  
  address = "New York City, USA",
  month = jun,
  annote = "[to read]"
}

@PhdThesis{Voelzer:2000:FRK,
  author = 	 {Hagen {V\"olzer}},
  title = 	 {{Fairness, Randomisierung und Konspiration in
                  verteilten Algorithmen}},
  school = 	 {Humboldt Universit\"at zu Berlin, Fakult\"at f\"ur
                  Informatik},
  year = 	 {2000},
  OPTkey = 	 {},
  OPTtype = 	 {},
  OPTaddress = 	 {},
  month = 	 dec,
  OPTnote = 	 {},
  OPTannote = 	 {},
  url = "\url{http://dochost.rz.hu-berlin.de/abstract.php3/dissertationen/voelzer-hagen-2000-12-08}",

}


@Article{Wang:2000:PDA,
  author =       "Wenli Wang and Zolt{\'a}n Hidv{\'e}gi and Andrew D.
                 {Bailey, Jr.} and Andrew B. Whinston",
  title =        "{E}-Process Design and Assurance Using Model
                 Checking",
  journal =      "Computer",
  volume =       "33",
  number =       "10",
  pages =        "48--53",
  month =        oct,
  year =         "2000",
  url =          "http://www.computer.org/computer/co2000/rx048abs.htm;
                 http://dlib.computer.org/co/books/co2000/pdf/rx048.pdf",
  abstract =     "Using a simple online ticket sales example and the
                 authors demonstrate that model checking can help
                 businesses verify their e-processes.",
  annote =       "Shows that with model checking you can do model
                 checking. Nothing particular to e-commerce or
                 security (unfortunately)."
}





@Book{Bergstra:2001:HPA,
  editor = 	 {Jan A. Bergstra and Alban Ponse and Scott A. Smolka},
  title = 	 {Handbook of Process Algebra},
  publisher = 	 {North-Holland},
  year = 	 {2001},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {schon da?}
}

@Article{Furse:2001:DTW,
  author = 	 {Cynthia Furse and Randy Haupt},
  title = 	 {Down to the Wire},
  journal = 	 {IEEE Spectrum},
  year = 	 {2001},
  OPTkey = 	 {},
  volume = 	 {38},
  number = 	 {2},
  pages = 	 {34--39},
  month = 	 feb,
  OPTnote = 	 {},
  annote = 	 {Drastic feature about the risks of aging wiring in
    aircraft. Airplanes stay in use for more than 20 years and so
    many parts are in danger of failing because of age. Especially
    wires are critical because the aircraft is full of them and they
    cannot be easily replaced. Studies show that in 20+ years old
    aircraft there is between 1.6 and 13 cracks per 1000 meter
    wires (there are about 240 km of wire in a Lockheed L-1011). 
    Similar things count for military jets which stay in operation
    much longer (B-52s for example for 80 years). Faults can lead
    to sparks, fire, information loss, transient communication loss.
    Diagnosis tools are already good, but what is needed is
    prognosis.}
}

@PhdThesis{Gaertner:2001:FGF,
  author = 	 {Felix C. {G\"artner}},
  title = 	 {Formale Grundlagen der Fehlertoleranz in verteilten
                  Systemen},
  school = 	 {Fachbereich Informatik, TU Darmstadt},
  year = 	 {2001},
  OPTkey = 	 {},
  OPTtype = 	 {},
  OPTaddress = 	 {},
  month = 	 may,
  note = 	 {},
  OPTannote = 	 {},
  url = "\url{http://elib.tu-darmstadt.de/diss/000162/}",
}



@InProceedings{Gaertner:2001:DRF,
  author = 	 {Felix C. G\"artner and Hagen V\"olzer},
  title = 	 {Defining Redundancy in Fault-Tolerant Computing},
  booktitle = 	 {Brief Announcement at the 15th International 
                  Symposium on DIStributed Computing (DISC 2001)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {2001},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Lisbon, Portugal},
  month = 	 oct,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}



@TechReport{Gaertner:2001:GIF,
  author = 	 {Felix C. G\"artner},
  title = 	 {A gentle introduction to failure detectors and related
                  problems},
  institution =  {Darmstadt University of Technology, Department of
                  Computer Science},
  year = 	 {2001},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {TUD-BS-2001-01},
  OPTaddress = 	 {},
  month = 	 apr,
  OPTnote = 	 {},
  url =          "\url{http://www.informatik.tu-darmstadt.de/BS/Gaertner/publications/TUD-BS-2001-01.ps.gz}",
  annote = 	 {A more informal introduction to defining and using
                  unreliable failure detectors \cite{Chandra:1996:UFD}
                  in the design and analysis of fault tolerant distributed
                  algorithms.}
}

@InProceedings{Gaertner:2001:IPD,
  author = 	 {Felix C. G\"artner and Stefan Pleisch},
  title = 	 {{(Im)Possibilities} of predicate detection in
                  crash-affected systems},
  booktitle = 	 {Proceedings of the 5th Workshop on Self-Stabilizing Systems
                  (WSS2001)},
  OPTcrossref =  {},
  OPTkey = 	 {},
  OPTpages = 	 {},
  year = 	 {2001},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  number = 	 {2194},
  pages         ={98--113},
  series = 	 ser-LNCS,
  address = 	 {Lisbon, Portugal},
  month = 	 oct,
  OPTorganization = {},
  publisher = pub-SV,
  note = 	 {},
  OPTannote = 	 {}
}




@TechReport{Gaertner:2001:IPDIBM,
  author = 	 {Felix C. {G\"artner} and Stefan Pleisch},
  title = 	 {{(Im)Possibilities} of Predicate Detection in Crash-Affected Systems},
  institution =  {IBM Research Laboratory, Zurich},
  year = 	 {2001},
  OPTkey = 	 {},
  type = 	 {Research Report},
  number = 	 {RZ 3361 (\# 93407)},
  address = 	 {},
  month = 	 aug,
  url = "\url{http://domino.watson.ibm.com/library/CyberDig.nsf/Search}",
  OPTnote = 	 {},
  OPTannote = 	 {}
}





@Misc{LeLann:2001:ART,
  OPTkey = 	 {},
  author = 	 {Gerard LeLann},
  title = 	 {Is asynchronous real-time an oxymoron?},
  howpublished = {Invited presentation at the 15th International 
                  Symposium on DIStributed Computing (DISC 2001)},
  month = 	 oct,
  year = 	 {2001},
  note = 	 {Lisbon, Portugal},
  OPTannote = 	 {related reference is \cite{LeLann:1995:ORN}. Is this
                  published anywhere?}
}



@InProceedings{Aguilera:2002:OIF,
  author = 	 {Marcos K. Aguilera and {G\'erard} Le Lann and Sam Toueg},
  title = 	 {On the impact of fast failure detectors on real-time
                  fault-tolerant systems},
  booktitle = 	 {Proceedings of the 16th International 
                  Symposium on DIStributed Computing (DISC 2002)},
  crossref =  {Mahlki:2002:DC},
  OPTkey = 	 {},
  pages = 	 {354--369},
  year = 	 {2002},
  editor = 	 {Dahlia Malkhi},
  OPTvolume = 	 {},
  number = 	 {2508},
  series = 	 ser-LNCS,
  address = 	 {Toulouse, France},
  month = 	 oct,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  OPTannote = 	 {}
}


@InProceedings{Gaertner:2002:FDS,
  author = 	 {Felix C. {G\"artner} and Stefan Pleisch},
  title = 	 {Failure detection sequencers: {Necessary} and sufficient
                  information about failures to solve predicate detection},
  booktitle = 	 {Proceedings of the 16th International 
                  Symposium on DIStributed Computing (DISC 2002)},
  crossref =  {Mahlki:2002:DC},
  OPTkey = 	 {},
  pages = 	 {280--294},
  year = 	 {2002},
  editor = 	 {Dahlia Malkhi},
  OPTvolume = 	 {},
  number = 	 {2508},
  series = 	 ser-LNCS,
  address = 	 {Toulouse, France},
  month = 	 oct,
  OPTorganization = {},
  publisher = pub-SV,
  OPTnote = 	 {},
  OPTannote = 	 {}
}


@TechReport{Gaertner:2002:FDSIBM,
  author = 	 {Felix C. {G\"artner} and Stefan Pleisch},
  title = 	 {Failure detection sequencers: {Necessary} and sufficient
                  information about failures to solve predicate detection},
  institution =  {IBM Research Laboratory, Zurich},
  year = 	 {2002},
  OPTkey = 	 {},
  type = 	 {Research Report},
  number = 	 {RZ 3438},
  address = 	 {},
  OPTmonth = 	 aug,
  url = "\url{http://domino.watson.ibm.com/library/CyberDig.nsf/Search}",
  OPTnote = 	 {},
  OPTannote = 	 {}
}




@TechReport{Gaertner:2002:RLPCSS,
  author = 	 {Felix C. {G\"artner}},
  title = 	 {Revisiting Liveness Properties in the Context of Secure 
                  Systems},
  institution =  {Swiss Federal Institute of Technology (EPFL), School of
                  Computer and Communication Sciences},
  year = 	 {2002},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {200278},
  address = 	 {Lausanne, Switzerland},
  month = 	 nov,
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@Article{Guerraoui:2002:NBA,
  author = 	 {Rachid Guerraoui},
  title = 	 {Non-Blocking Atomic Commitment in Asynchronous Systems 
                  with Failure Detectors},
  journal = 	 j-DC,
  year = 	 {2002},
  OPTkey = 	 {},
  volume = 	 {15},
  number = 	 {1},
  OPTpages = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@InProceedings{Guerraoui:2002:WFD,
  author = 	 {Rachid Guerraoui and Petr Kouznetsov},
  title = 	 {On the weakest failure detector for non-blocking 
                  atomic commit},
  OPTcrossref =  {},
  OPTkey = 	 {},
  booktitle = {Proceedings of the International Conference on Theoretical 
                  Computer Science (TCS 2002), 17th IFIP World Computer 
                  Congress},
  OPTpages = 	 {},
  year = 	 2002,
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  address = 	 {Monteal, Canada},
  month = 	 aug,
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}




@Article{Hermant:2002:FAU,
  author = 	 {{Jean-Fran\c{c}ois} Hermant and {G\'erard} Le Lann},
  title = 	 {Fast asynchronous uniform consensus in real-time
                  distributed systems},
  journal = 	 j-IEEE-TRANS-COMP,
  year = 	 {2002},
  OPTkey = 	 {},
  volume = 	 {51},
  number = 	 {8},
  pages = 	 {931--944},
  month = 	 aug,
  OPTnote = 	 {},
  annote = 	 {A very relavant paper regarding the practicality of

    the failure detector approach. The basic idea of the paper is to
    use the principle of `late binding' (known from programming
    languages) to build real-time distributed protocols from
    asynchronous solutions for the `time-free' version of the
    problem. The approach is as follows: for a real-time problem, (1)
    turn the specification into a time-free problem (e.g. by basing
    timeliness requirements on certain activation conditions using
    time-free extensions to the asynchronous model - like failure
    detectors), then devise an asynchronous solution, (2) design a
    solution to the time-free extension in an as weak partially
    synchronous model as possible, (3) if the original problem is a
    real-time problem or in case one needs to predict real-time
    behavior, bind the parameters of the time-free extension to some
    possibly stronger partially synchronous model and establish time
    bounds for the extension, from that establish time bounds for the
    overall algorithm. Why is late binding good? First of all,
    devising solutions in this way results in systems that satisfy
    safety and liveness with the highest amount of coverage possible
    under the fault assumption (the coverage of the asynchronous model
    - because it makes no assumption - is higher than any (partially)
    synchronous model).Second, early binding of a solution makes you
    have to reason about timing and scheduling even if the original
    problem is not a real-time computing problem. The paper shows how
    late binding can be done using uniform consensus based on a strong
    failure detector (using which algorithm?), implementing the
    failure detector in a real-time Ethernet, and from that deriving a
    fast uniform consensus algorithm. This approach also has the
    advantage that failure detection has expedited delivery and so the
    failure detection time can be magnitutes smaller than regular
    message delivery (see also \cite{Aguilera:2002:OIF}). The
    timed-asynchronous (TA) system model \cite{Cristian:1999:TAD} and
    the timely computing base (TCB) \cite{Verissimo:2000:TCB} all do
    early binding. These models try then to enforce timing assumptions
    by what here is called ``measure-compare-and-kill'' (similar to
    the ``process controlled crash'' explained in
    \cite[p.14]{Defago:2000:ARP} used in ISIS and other systems). This
    means that a continuing timing failure detection takes place and
    that late services are treated as omissions, and it assumes that
    every timing failure is detected to maintain the confidence in the
    correctness. However, this means to perform scheduling and
    real-time analyses almost everywhere in the system, which can be
    tough. If these bounds are violated, the system might even lose
    liveness. The paper concludes (rather strongly) that ``TA and TCB
    lead to inefficient working solutions.'' Overall, this paper is
    both conceptual and technical (with a lot of real-time stuff) and
    argues strongly for its points.  Some material presented at DISC
    2001 \cite{LeLann:2001:ART}.}  
}

@InProceedings{Jhumka:2002:SDC,
  author = 	 {Arshad Jhumka and Martin Hiller and Vilgot Claesson and
                  Neeraj Suri},
  title = 	 {On systematic design of consistent executable assertions
                  for distributed embedded software},
  OPTcrossref =  {},
  OPTkey = 	 {},
  booktitle = {Proceedings of the ACM Joint Conference on Languages,
                  Compilers and Tools for Embedded Systems/Software
                  and Compilers for Embedded Systems (LCTES/SCOPES)},
  pages = 	 {74--83},
  year = 	 {2002},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTmonth = 	 {},
  OPTorganization = {},
  OPTpublisher = {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}





@TechReport{Jhumka:2002:OSD,
  author = 	 {Arshad Jhumka and Felix C. {Gärtner} and  
                  Christof Fetzer and Neeraj Suri},
  title = 	 {On Systematic Design of Fast and Perfect Detectors},
  institution =  {Swiss Federal Institute of Technology (EPFL), School of
                  Computer and Communication Sciences},
  year = 	 {2002},
  OPTkey = 	 {},
  OPTtype = 	 {},
  number = 	 {200263},
  address = 	 {Lausanne, Switzerland},
  month = 	 sep,
  OPTnote = 	 {},
  OPTannote = 	 {}
}





@InProceedings{Kulkarni:2002:CAF,
  author = 	 {Sandeep S. Kulkarni and A. Ebnenasir},
  title = 	 {Complexity of adding failsafe fault-tolerance},
  OPTcrossref =  {},
  OPTkey = 	 {},
  booktitle = {Proceedings of the 22nd IEEE International Conference
                  on Distributed Computing Systems (ICDCS 2002)},
  pages = 	 {337--344},
  year = 	 {2002},
  OPTeditor = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  month = 	 jul,
  OPTorganization = {},
  publisher = pub-IEEE,
  OPTnote = 	 {},
  OPTannote = 	 {}
}



@Book{Malkhi:2002:DC,
  editor = 	 {Dahlia Malkhi},
  title = 	 {Distributed Computing. 16th International Conference 
                  (DISC 2002)},
  publisher = 	 pub-SV,
  year = 	 {2002},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  number = 	 {2508},
  series = 	 ser-LNCS,
  address = 	 {Toulouse, France},
  OPTedition = 	 {},
  month = 	 oct,
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@PhdThesis{Muehl:2002:FGF,
  author = 	 {Gero {M\"uhl}},
  title = 	 {Large-Scale Content-Based Publish-Subscribe Systems},
  school = 	 {Fachbereich Informatik, TU Darmstadt},
  year = 	 {2002},
  OPTkey = 	 {},
  OPTtype = 	 {},
  OPTaddress = 	 {},
  month = 	 nov,
  note = 	 {},
  OPTannote = 	 {},
  url = "\url{http://elib.tu-darmstadt.de/diss/000274/}",
}


@Article{Pagnia:2003:FE,
  author = 	 {Henning Pagnia and Holger Vogt and Felix C. {G\"artner}},
  title = 	 {Fair Exchange},
  journal = 	 j-COMP-J,
  year = 	 {2003},
  OPTkey = 	 {},
  volume = 	 {46},
  number = 	 {1},
  OPTpages = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@Misc{Hadzilacos:FFT,
  OPTcrossref =  "",
  OPTkey =       "",
  author =       "Vassos Hadzilacos and Prasad Jayanti and Sam Toueg",
  title =        "Fundamentals of Fault-Tolerant Distributed Computing",
  howpublished = "Forthcoming",
  OPTyear =      "",
  OPTmonth =     "",
  OPTnote =      "",
  annote =       "Referenced in \cite{Hadzilacos:1994:MAF} but
		  obviously has not been published yet."
}

@Misc{MAFTIA,
  key = 	 {MAFTIA},
  OPTauthor = 	 {},
  title = 	 {MAFTIA Home -- {Malicious- and Accidental-Fault Tolerance for
                  Internet Applications}},
  howpublished = {Internet: 
                  \url{http://www.newcastle.research.ec.org/maftia/}},
  OPTmonth = 	 {},
  OPTyear = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}