     Copyright (C) 1996-1997 by Haruo Yokota,
     Japan Advanced Inst. of Sci. and Tech., Hokuriku
     1-1 Asahidai, Tatsunokuchi, Ishikawa 923-12, Japan.
---------------------------------------------------------------------------
	Fault-Tolerant Parallel Software Library

						on Apr. in 1997
						by Eiji Sugino (JAIST)

[Files] 

	README.j
	README.e
	README-j.html
	README-e.html

	watchdog.kl1	
	copy.kl1	

	ncube.kl1	% for nCUBE2
	sparc.kl1	% for SPARC 

        === samples ===
	----- sample 1 -----
	main.kl1	
	queen.kl1	
	ft_queen.kl1	
	ft_queen_faulty.kl1  
	----- sample 2 -----
	main-sim.kl1     
	f_sim.kl1	 
	f_sim_faulty.kl1 

[Function] 
	It executes programs like samples as fault-tolerant parallel software.

  distribution(Primary,Backup) : in watchdog.kl1 

	divides all nodes into two groups, and fork watchdog processes on
	each nodes. 

  copy(Args,Args1,Args2,Interrupt) : in copy.kl1 

	replicates arguments 'Args' into 'Args1' and 'Args2',
	and prepares forwarding processes for each variables.

[Usages]
	You should compile your program with 

		1) watchdog.kl1 and copy.kl1, and 
		2) ncube.kl1   or  sparc.kl1 depended on your system

	for example, 

	% klic -v -dp -o dofts watchdog.kl1 copy.kl1 sparc.kl1 main.kl1 ft_queen.kl1 

	% ./dofts -n -p5 7d 

	Attention)  You should put nodes, number of which is more than 2.

-----
[Example]
	This program is under construction and we cannot complete
	as tracing facility on distributed KLIC is not available.
	Now you can see an only example execution. 

	1) compile it

	% make sparc

	2) do without any error 

	% ./dofts-s -p5 10 100d
	Leader [1]
	Leader [2]
	Backup Site Group  : 2 4 
	Primary Site Group : 1 3 
	30 [67,1]                     <== outputs A (30) and B ([67,1])
	Response time is 1359 msec

	3) do with an error

	amethyst[300]% ./dofts-sf -p5 10 100d
	Leader [1]
	Leader [2]
	Primary Site Group : 1 3 
	Backup Site Group  : 2 4 
	BOMB! 3					<== 'exit' is called on Node 3 
	30 FAULT was detected on PRIMARY SITE!	<== fault is detected after output 'A' 
	FAULT was informed to BACKUP!
	BACKUP change to PRIMARY !!! REBIRTH (1) [4]
	REBIRTH (2) [4]
	 ... REBIRTH ndet_replay [117,1]	<== outputs B after some messages
	^Ckill tasks from io_server		<== finish with ^C 

-------------------
Because Fault-tolerant Conversion program is under construction,
you should write one by hand. And we only guide your programming.

[Guide for FATPAS]
		
 1)  Split program into two parts; host program and fault-tolerant one.

	ex.  Original user program is constructed of main.kl1 and queen.kl1 .
		And  'queen:queen(N,Result)'  is the target for FATPAS.

 2)  Convert the target program as followings.

   2-1) The top clause is converted as followings.

	queen(N,R) :-       % Original HEAD
		% fork watchdogs
	    watchdog:distribution(Primary,Backup),  
		% 
	    queen_1({N,R},Primary,Backup) @ lower_priority(10).  

	queen_1(Args,[PTop|Primary],[BTop|Backup]) :-  
		% replicate arguments
	    copy:copy(Args,Args1,Args2,Interrupt),  
		% merge the interrupt streams
	    Interrupt = {Interrupt1,Interrupt2}, Log = ack(Log1),
		% fork a top goal for each sites
	    PTop = {primary,queen,queen,Args1,Log,Signal,Interrupt1},
	    BTop = {backup ,queen,queen,Args2,Log1,Signal,Interrupt2}.

   2-2) Make clauses for the top goal in module 'exgoal'.

	Define following clauses between '=-=-=-=-' lines.

	:- module exgoal.

	call_goal(Site,Module,Predicate,Args,Log,GSig,Raise)-SC :- 
	    call_goal_0(Site,Module,Predicate,Args,Log,GSig,Raise)-SC @ lower_priority.

	% =-=-=-=-
	call_goal_0(primary,queen,queen,{A,B},Log,GSig,Raise)-SC :-
	    queen:queen_record(A,B,Log)+GSig+Raise-SC.
	call_goal_0(backup ,queen,queen,{A,B},Log,GSig,Raise)-SC :-
	    queen:queen_replay(A,B,Log)+GSig+Raise-SC.
	% =-=-=-=-
	otherwise.
	call_goal_0(Type,Module,Method,Arguments,Log,GSignal,Raise)-SC :-
	    klicio:klicio([stdout(normal(Out))]),
	    variable:wrap((Type::Module:Method/Arguments), G), 
	    Out = [fwrite("Illegal goal invocation : "),
		   putwt(G), nl,fflush(_)],
	    Raise = [].

    2-3)  Make clauses for the top goal in original module.

	(1) It's like Instant Replay conversion.
	
	% Record Version
	queen_record(N,X,Log) :- 
		current_node(_,All),
		queen_0_record(N,X,~(All-1),Log)@node(1).

	queen_0_record(4,X,A,Log) :- queen_record([1,2,3,4],[],[],X,A,Log).
	.... 

	queen_record([P|U],C,L,I,PE,Log) :- 
		Log = c1(Log1,Log2,Log3),
		TO:= (P mod PE)+1,
		throw_record(U,[P|C],L,I2,PE,TO,Log1),
 		merge_record(I1,I2,I,Log2),
		append(U,C,N),
		c1_record(P,1,N,L,L,I1,PE,Log3).

	% Replay Version
	queen_replay(N,X,Log) :- 
		current_node(_,All),
		queen_0_replay(N,X,~(All-1),Log)@node(1).

	queen_0_replay(4,X,A,Log) :- queen_replay([1,2,3,4],[],[],X,A,Log).
	.... 

	queen_replay([P|U],C,L,I,PE,Log) :- 
	    Log = c1(Log1,Log2,Log3) | 
		TO:= (P mod PE)+1,
		throw_replay(U,[P|C],L,I2,PE,TO,Log1),
 		merge_replay(I1,I2,I,Log2),
		append(U,C,N),
		c1_replay(P,1,N,L,L,I1,PE,Log3).

	(2) Add following arguments for all user-defined predicates.

		GSig : is for interruption from top to leaf. 
			You should carry it to sub-goals.
		Raise: is for signal from leaf to top. 
			When the clause has several sub-goals,
			you should merge them in sub-goals to one.
		SC:   is a variable pair for short-circuit detection.
			You only put it on all goals. 

	% Record Version
	queen_record(N,X,Log)+GSig+Raise-SC :- 
		current_node(_,All),
		queen_0_record(N,X,~(All-1),Log)+GSig+Raise-SC @node(1).

	queen_0_record(4,X,A,Log)+GSig+Raise-SC :- 
		queen_record([1,2,3,4],[],[],X,A,Log)+GSig+Raise-SC.
	.... 
	
	queen_record([P|U],C,L,I,PE,Log)+GSig+Raise-SC :- 
		Raise = {Raise1,Raise2,Raise3,Raise4},
		Log = c1(Log1,Log2,Log3),
		TO:= (P mod PE)+1,
		throw_record(U,[P|C],L,I2,PE,TO,Log1)+GSig+Raise1-SC,
 		merge_record(I1,I2,I,Log2)+GSig+Raise2-SC,
		append_record(U,C,N)+GSig+Raise3-SC,
		c1_record(P,1,N,L,L,I1,PE,Log3)+GSig+Raise4-SC.

	(3) In Record Version, you should put a synchronization argument
		'ack(...)' in head goal as followings.

	queen_record([P|U],C,L,I,PE,ack(Log))+GSig+Raise-SC :- 
		Raise = {Raise1,Raise2,Raise3,Raise4},
		Log = c1(Log1,Log2,Log3),
		TO:= (P mod PE)+1,
		throw_record(U,[P|C],L,I2,PE,TO,Log1)+GSig+Raise1-SC,
 		merge_record(I1,I2,I,Log2)+GSig+Raise2-SC,
		append_record(U,C,N)+GSig+Raise3-SC,
		c1_record(P,1,N,L,L,I1,PE,Log3)+GSig+Raise4-SC.
	queen_record([],[_|_],_,I,ack(Log))+Sig+Raise-SC:-
		Raise=[],
		Log=c2(Ack), 
		I=[].

	(4) In Record Version, you should put a synchronization goal
		'output(...)' for body-unification as followings.

	queen_record([],[_|_],_,I,ack(Log))+Sig+Raise-SC:-
		Raise=[],
		Log=c2(Ack), 
		output(Ack,I, [])-SC.
	     
	output(Ack,X,Y)-SC :- wait(Ack) | X = Y. 

	(5) In Replay Version, you should put a clause for interruption check.

	% Replay Version
	queen_replay(A,   B,   Log)+GSig+Raise-SC :-
	    (wait(Log) -> queen_replay_0(A, B, Log)+GSig+Raise-SC ;
	     alternatively;
	     GSig = [rebirth|GSig1] -> queen_record(A, B, _)+GSig1+Raise-SC).

	queen_replay_0(N,X,Log)+GSig+Raise-SC :- 
		current_node(_,All),
		queen_0_replay(N,X,~(All-1),Log)+GSig+Raise-SC @node(1).

	queen_0_replay( 4,X,A,Log)+GSig+Raise-SC :- 
		queen_replay([1,2,3,4],[],[],X,A,Log)+GSig+Raise-SC.
	....

	queen_record([P|U],C,L,I,PE,Log)+GSig+Raise-SC :- 
	   Log = c1(Log1,Log2,Log3) |
		Raise = {Raise1,Raise2,Raise3,Raise4},
		TO:= (P mod PE)+1,
		throw_replay(U,[P|C],L,I2,PE,TO,Log1)+GSig+Raise1-SC,
 		merge_replay(I1,I2,I,Log2)+GSig+Raise2-SC,
		append_replay(U,C,N)+GSig+Raise3-SC,
		c1_replay(P,1,N,L,L,I1,PE,Log3)+GSig+Raise4-SC.
	queen_replay([],[_|_],_,I,ack(Log))+Sig+Raise-SC:-
	    Log=c2(Ack) |
		Raise=[],
		I=[].

	(6) You should change throwing goals ('goal @ node(N)') into 
		unification goals as followings.

	throw_record(A,B,C,D,E,F,Log)+GSig+Raise-SC :- 
		Raise = [goal(primary,queen,queen,{A,B,C,D,E},Log)].
	throw_replay(A,B,C,D,E,F,Log)+GSig+Raise-SC :- 
		Raise = [goal(primary,queen,queen,{A,B,C,D,E},Log)].

	  In this version, goals are thrown to the neighboring node,
	so you need not add destination node number.

	(7) You should add clauses for throwing goals in module 'exgoal'.

	call_goal_0(primary,queen,queen,{A,B,C,D,E},Log,GSig,Raise)-SC :-
	    queen:queen_record(A,B,C,D,E,Log)+GSig+Raise-SC.
	call_goal_0(backup ,queen,queen,{A,B,C,D,E},Log,GSig,Raise)-SC :-
	    queen:queen_replay(A,B,C,D,E,Log)+GSig+Raise-SC.
	....
	otherwise.
	call_goal_0(Type,Module,Method,Arguments,Log,GSignal,Raise)-SC :-
	    klicio:klicio([stdout(normal(Out))]),
	    variable:wrap((Type::Module:Method/Arguments), G), 
	    Out = [fwrite("Illegal goal invocation : "),
		   putwt(G), nl,fflush(_)],
	    Raise = [].

	Attention) You can get "ft_queen.kl1" after some optimization.
