-
Notifications
You must be signed in to change notification settings - Fork 0
/
elizabot.js
391 lines (365 loc) · 11.2 KB
/
elizabot.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
/*
elizabot.js v.1.1 - ELIZA JS library (N.Landsteiner 2005)
Eliza is a mock Rogerian psychotherapist.
Original program by Joseph Weizenbaum in MAD-SLIP for "Project MAC" at MIT.
cf: Weizenbaum, Joseph "ELIZA - A Computer Program For the Study of Natural Language
Communication Between Man and Machine"
in: Communications of the ACM; Volume 9 , Issue 1 (January 1966): p 36-45.
JavaScript implementation by Norbert Landsteiner 2005; <http://www.masserk.at>
synopsis:
new ElizaBot( <random-choice-disable-flag> )
ElizaBot.prototype.transform( <inputstring> )
ElizaBot.prototype.getInitial()
ElizaBot.prototype.getFinal()
ElizaBot.prototype.reset()
usage: var eliza = new ElizaBot();
var initial = eliza.getInitial();
var reply = eliza.transform(inputstring);
if (eliza.quit) {
// last user input was a quit phrase
}
// method `transform()' returns a final phrase in case of a quit phrase
// but you can also get a final phrase with:
var final = eliza.getFinal();
// other methods: reset memory and internal state
eliza.reset();
// to set the internal memory size override property `memSize':
eliza.memSize = 100; // (default: 20)
// to reproduce the example conversation given by J. Weizenbaum
// initialize with the optional random-choice-disable flag
var originalEliza = new ElizaBot(true);
`ElizaBot' is also a general chatbot engine that can be supplied with any rule set.
(for required data structures cf. "elizadata.js" and/or see the documentation.)
data is parsed and transformed for internal use at the creation time of the
first instance of the `ElizaBot' constructor.
vers 1.1: lambda functions in RegExps are currently a problem with too many browsers.
changed code to work around.
*/
function ElizaBot(noRandomFlag) {
this.noRandom= (noRandomFlag)? true:false;
this.capitalizeFirstLetter=true;
this.debug=false;
this.memSize=20;
this.version="1.1 (original)";
if (!this._dataParsed) this._init();
this.reset();
}
ElizaBot.prototype.reset = function() {
this.quit=false;
this.mem=[];
this.lastchoice=[];
for (var k=0; k<elizaKeywords.length; k++) {
this.lastchoice[k]=[];
var rules=elizaKeywords[k][2];
for (var i=0; i<rules.length; i++) this.lastchoice[k][i]=-1;
}
}
ElizaBot.prototype._dataParsed = false;
ElizaBot.prototype._init = function() {
// install ref to global object
var global=ElizaBot.prototype.global=self;
// parse data and convert it from canonical form to internal use
// prodoce synonym list
var synPatterns={};
if ((global.elizaSynons) && (typeof elizaSynons == 'object')) {
for (var i in elizaSynons) synPatterns[i]='('+i+'|'+elizaSynons[i].join('|')+')';
}
// check for keywords or install empty structure to prevent any errors
if ((!global.elizaKeywords) || (typeof elizaKeywords.length == 'undefined')) {
elizaKeywords=[['###',0,[['###',[]]]]];
}
// 1st convert rules to regexps
// expand synonyms and insert asterisk expressions for backtracking
var sre=/@(\S+)/;
var are=/(\S)\s*\*\s*(\S)/;
var are1=/^\s*\*\s*(\S)/;
var are2=/(\S)\s*\*\s*$/;
var are3=/^\s*\*\s*$/;
var wsre=/\s+/g;
for (var k=0; k<elizaKeywords.length; k++) {
var rules=elizaKeywords[k][2];
elizaKeywords[k][3]=k; // save original index for sorting
for (var i=0; i<rules.length; i++) {
var r=rules[i];
// check mem flag and store it as decomp's element 2
if (r[0].charAt(0)=='$') {
var ofs=1;
while (r[0].charAt[ofs]==' ') ofs++;
r[0]=r[0].substring(ofs);
r[2]=true;
}
else {
r[2]=false;
}
// expand synonyms (v.1.1: work around lambda function)
var m=sre.exec(r[0]);
while (m) {
var sp=(synPatterns[m[1]])? synPatterns[m[1]]:m[1];
r[0]=r[0].substring(0,m.index)+sp+r[0].substring(m.index+m[0].length);
m=sre.exec(r[0]);
}
// expand asterisk expressions (v.1.1: work around lambda function)
if (are3.test(r[0])) {
r[0]='\\s*(.*)\\s*';
}
else {
m=are.exec(r[0]);
if (m) {
var lp='';
var rp=r[0];
while (m) {
lp+=rp.substring(0,m.index+1);
if (m[1]!=')') lp+='\\b';
lp+='\\s*(.*)\\s*';
if ((m[2]!='(') && (m[2]!='\\')) lp+='\\b';
lp+=m[2];
rp=rp.substring(m.index+m[0].length);
m=are.exec(rp);
}
r[0]=lp+rp;
}
m=are1.exec(r[0]);
if (m) {
var lp='\\s*(.*)\\s*';
if ((m[1]!=')') && (m[1]!='\\')) lp+='\\b';
r[0]=lp+r[0].substring(m.index-1+m[0].length);
}
m=are2.exec(r[0]);
if (m) {
var lp=r[0].substring(0,m.index+1);
if (m[1]!='(') lp+='\\b';
r[0]=lp+'\\s*(.*)\\s*';
}
}
// expand white space
r[0]=r[0].replace(wsre, '\\s+');
wsre.lastIndex=0;
}
}
// now sort keywords by rank (highest first)
elizaKeywords.sort(this._sortKeywords);
// and compose regexps and refs for pres and posts
ElizaBot.prototype.pres={};
ElizaBot.prototype.posts={};
if ((global.elizaPres) && (elizaPres.length)) {
var a=new Array();
for (var i=0; i<elizaPres.length; i+=2) {
a.push(elizaPres[i]);
ElizaBot.prototype.pres[elizaPres[i]]=elizaPres[i+1];
}
ElizaBot.prototype.preExp = new RegExp('\\b('+a.join('|')+')\\b');
}
else {
// default (should not match)
ElizaBot.prototype.preExp = /####/;
ElizaBot.prototype.pres['####']='####';
}
if ((global.elizaPosts) && (elizaPosts.length)) {
var a=new Array();
for (var i=0; i<elizaPosts.length; i+=2) {
a.push(elizaPosts[i]);
ElizaBot.prototype.posts[elizaPosts[i]]=elizaPosts[i+1];
}
ElizaBot.prototype.postExp = new RegExp('\\b('+a.join('|')+')\\b');
}
else {
// default (should not match)
ElizaBot.prototype.postExp = /####/;
ElizaBot.prototype.posts['####']='####';
}
// check for elizaQuits and install default if missing
if ((!global.elizaQuits) || (typeof elizaQuits.length == 'undefined')) {
elizaQuits=[];
}
// done
ElizaBot.prototype._dataParsed=true;
}
ElizaBot.prototype._sortKeywords = function(a,b) {
// sort by rank
if (a[1]>b[1]) return -1
else if (a[1]<b[1]) return 1
// or original index
else if (a[3]>b[3]) return 1
else if (a[3]<b[3]) return -1
else return 0;
}
ElizaBot.prototype.transform = function(text) {
var rpl='';
this.quit=false;
// unify text string
text=text.toLowerCase();
text=text.replace(/@#\$%\^&\*\(\)_\+=~`\{\[\}\]\|:;<>\/\\\t/g, ' ');
text=text.replace(/\s+-+\s+/g, '.');
text=text.replace(/\s*[,\.\?!;]+\s*/g, '.');
text=text.replace(/\s*\bbut\b\s*/g, '.');
text=text.replace(/\s{2,}/g, ' ');
// split text in part sentences and loop through them
var parts=text.split('.');
for (var i=0; i<parts.length; i++) {
var part=parts[i];
if (part!='') {
// check for quit expression
for (var q=0; q<elizaQuits.length; q++) {
if (elizaQuits[q]==part) {
this.quit=true;
return this.getFinal();
}
}
// preprocess (v.1.1: work around lambda function)
var m=this.preExp.exec(part);
if (m) {
var lp='';
var rp=part;
while (m) {
lp+=rp.substring(0,m.index)+this.pres[m[1]];
rp=rp.substring(m.index+m[0].length);
m=this.preExp.exec(rp);
}
part=lp+rp;
}
this.sentence=part;
// loop trough keywords
for (var k=0; k<elizaKeywords.length; k++) {
if (part.search(new RegExp('\\b'+elizaKeywords[k][0]+'\\b', 'i'))>=0) {
rpl = this._execRule(k);
}
if (rpl!='') return rpl;
}
}
}
// nothing matched try mem
rpl=this._memGet();
// if nothing in mem, so try xnone
if (rpl=='') {
this.sentence=' ';
var k=this._getRuleIndexByKey('xnone');
if (k>=0) rpl=this._execRule(k);
}
// return reply or default string
return (rpl!='')? rpl : 'I am at a loss for words.';
}
ElizaBot.prototype._execRule = function(k) {
var rule=elizaKeywords[k];
var decomps=rule[2];
var paramre=/\(([0-9]+)\)/;
for (var i=0; i<decomps.length; i++) {
var m=this.sentence.match(decomps[i][0]);
if (m!=null) {
var reasmbs=decomps[i][1];
var memflag=decomps[i][2];
var ri= (this.noRandom)? 0 : Math.floor(Math.random()*reasmbs.length);
if (((this.noRandom) && (this.lastchoice[k][i]>ri)) || (this.lastchoice[k][i]==ri)) {
ri= ++this.lastchoice[k][i];
if (ri>=reasmbs.length) {
ri=0;
this.lastchoice[k][i]=-1;
}
}
else {
this.lastchoice[k][i]=ri;
}
var rpl=reasmbs[ri];
if (this.debug) alert('match:\nkey: '+elizaKeywords[k][0]+
'\nrank: '+elizaKeywords[k][1]+
'\ndecomp: '+decomps[i][0]+
'\nreasmb: '+rpl+
'\nmemflag: '+memflag);
if (rpl.search('^goto ', 'i')==0) {
ki=this._getRuleIndexByKey(rpl.substring(5));
if (ki>=0) return this._execRule(ki);
}
// substitute positional params (v.1.1: work around lambda function)
var m1=paramre.exec(rpl);
if (m1) {
var lp='';
var rp=rpl;
while (m1) {
var param = m[parseInt(m1[1])];
// postprocess param
var m2=this.postExp.exec(param);
if (m2) {
var lp2='';
var rp2=param;
while (m2) {
lp2+=rp2.substring(0,m2.index)+this.posts[m2[1]];
rp2=rp2.substring(m2.index+m2[0].length);
m2=this.postExp.exec(rp2);
}
param=lp2+rp2;
}
lp+=rp.substring(0,m1.index)+param;
rp=rp.substring(m1.index+m1[0].length);
m1=paramre.exec(rp);
}
rpl=lp+rp;
}
rpl=this._postTransform(rpl);
if (memflag) this._memSave(rpl)
else return rpl;
}
}
return '';
}
ElizaBot.prototype._postTransform = function(s) {
// final cleanings
s=s.replace(/\s{2,}/g, ' ');
s=s.replace(/\s+\./g, '.');
if ((this.global.elizaPostTransforms) && (elizaPostTransforms.length)) {
for (var i=0; i<elizaPostTransforms.length; i+=2) {
s=s.replace(elizaPostTransforms[i], elizaPostTransforms[i+1]);
elizaPostTransforms[i].lastIndex=0;
}
}
// capitalize first char (v.1.1: work around lambda function)
if (this.capitalizeFirstLetter) {
var re=/^([a-z])/;
var m=re.exec(s);
if (m) s=m[0].toUpperCase()+s.substring(1);
}
return s;
}
ElizaBot.prototype._getRuleIndexByKey = function(key) {
for (var k=0; k<elizaKeywords.length; k++) {
if (elizaKeywords[k][0]==key) return k;
}
return -1;
}
ElizaBot.prototype._memSave = function(t) {
this.mem.push(t);
if (this.mem.length>this.memSize) this.mem.shift();
}
ElizaBot.prototype._memGet = function() {
if (this.mem.length) {
if (this.noRandom) return this.mem.shift();
else {
var n=Math.floor(Math.random()*this.mem.length);
var rpl=this.mem[n];
for (var i=n+1; i<this.mem.length; i++) this.mem[i-1]=this.mem[i];
this.mem.length--;
return rpl;
}
}
else return '';
}
ElizaBot.prototype.getFinal = function() {
if (!ElizaBot.prototype.global.elizaFinals) return '';
return elizaFinals[Math.floor(Math.random()*elizaFinals.length)];
}
ElizaBot.prototype.getInitial = function() {
if (!ElizaBot.prototype.global.elizaInitials) return '';
return elizaInitials[Math.floor(Math.random()*elizaInitials.length)];
}
// fix array.prototype methods (push, shift) if not implemented (MSIE fix)
if (typeof Array.prototype.push == 'undefined') {
Array.prototype.push=function(v) { return this[this.length]=v; };
}
if (typeof Array.prototype.shift == 'undefined') {
Array.prototype.shift=function() {
if (this.length==0) return null;
var e0=this[0];
for (var i=1; i<this.length; i++) this[i-1]=this[i];
this.length--;
return e0;
};
}
// eof