1 files changed, 561 insertions, 0 deletions
diff --git a/appl/lib/rfc822.b b/appl/lib/rfc822.b
new file mode 100644
index 00000000..0f15a585
--- /dev/null
+++ b/appl/lib/rfc822.b
@@ -0,0 +1,561 @@
+implement RFC822;
+
+include "sys.m";
+	sys: Sys;
+
+include "bufio.m";
+	bufio: Bufio;
+	Iobuf: import bufio;
+	
+include "rfc822.m";
+
+include "string.m";
+	str: String;
+
+include "daytime.m";
+	daytime: Daytime;
+	Tm: import daytime;
+
+Minrequest: con 512;	# more than enough for most requests
+
+Suffix: adt {
+	suffix: string;
+	generic: string;
+	specific: string;
+	encoding: string;
+};
+
+SuffixFile: con "/lib/mimetype";
+mtime := 0;
+qid: Sys->Qid;
+
+suffixes: list of ref Suffix;
+
+nomod(s: string)
+{
+	raise sys->sprint("internal: can't load %s: %r", s);
+}
+
+init(b: Bufio)
+{
+	sys = load Sys Sys->PATH;
+	bufio = b;
+	str = load String String->PATH;
+	if(str == nil)
+		nomod(String->PATH);
+	daytime = load Daytime Daytime->PATH;
+	if(daytime == nil)
+		nomod(Daytime->PATH);
+	readsuffixfile();
+}
+
+readheaders(fd: ref Iobuf, limit: int): array of (string, array of byte)
+{
+	n := 0;
+	s := 0;
+	b := array[Minrequest] of byte;
+	nline := 0;
+	lines: list of array of byte;
+	while((c := fd.getb()) >= 0){
+		if(c == '\r'){
+			c = fd.getb();
+			if(c < 0)
+				break;
+			if(c != '\n'){
+				fd.ungetb();
+				c = '\r';
+			}
+		}
+		if(n >= len b){
+			if(len b >= limit)
+				return nil;
+			ab := array[n+512] of byte;
+			ab[0:] = b;
+			b = ab;
+		}
+		b[n++] = byte c;
+		if(c == '\n'){
+			if(n == 1 || b[n-2] == byte '\n')
+				break;	# empty line
+			c = fd.getb();
+			if(c < 0)
+				break;
+			if(c != ' ' && c != '\t'){	# not continued
+				fd.ungetb();
+				lines = b[s: n] :: lines;
+				nline++;
+				s = n;
+			}else
+				b[n-1] = byte ' ';
+		}
+	}
+	if(n == 0)
+		return nil;
+	b = b[0: n];
+	if(n != s){
+		lines = b[s:n] :: lines;
+		nline++;
+	}
+	a := array[nline] of (string, array of byte);
+	for(; lines != nil; lines = tl lines){
+		b = hd lines;
+		name := "";
+		for(i := 0; i < len b; i++)
+			if(b[i] == byte ':'){
+				name = str->tolower(string b[0:i]);
+				b = b[i+1:];
+				break;
+			}
+		a[--nline] = (name, b);
+	}
+	return a;
+}
+
+#
+# *(";" parameter) used in transfer-extension, media-type and media-range
+# parameter = attribute "=" value
+# attribute = token
+# value = token | quoted-string
+#
+parseparams(ps: ref Rfclex): list of (string, string)
+{
+	l: list of (string, string);
+	do{
+		if(ps.lex() != Word)
+			break;
+		attr := ps.wordval;
+		if(ps.lex() != '=' || ps.lex() != Word && ps.tok != QString)
+			break;
+		l = (attr, ps.wordval) :: l;
+	}while(ps.lex() == ';');
+	ps.unlex();
+	return rev(l);
+}
+
+#
+# 1#transfer-coding
+#
+mimefields(ps: ref Rfclex): list of (string, list of (string, string))
+{
+	rf: list of (string, list of (string, string));
+	do{
+		if(ps.lex() == Word){
+			w := ps.wordval;
+			if(ps.lex() == ';'){
+				rf = (w, parseparams(ps)) :: rf;
+				ps.lex();
+			}else
+				rf = (w, nil) :: rf;
+		}
+	}while(ps.tok == ',');
+	ps.unlex();
+	f: list of (string, list of (string, string));
+	for(; rf != nil; rf = tl rf)
+		f = hd rf :: f;
+	return f;
+}
+
+#	#(media-type | (media-range [accept-params]))	; Content-Type and Accept
+#
+#       media-type     = type "/" subtype *( ";" parameter )
+#       type           = token
+#       subtype        = token
+#	LWS must not be used between type and subtype, nor between attribute and value (in parameter)
+#
+#	media-range = ("*/*" | type "/*" | type "/" subtype ) *(";' parameter)
+#    	accept-params  = ";" "q" "=" qvalue *( accept-extension )
+#	accept-extension = ";" token [ "=" ( token | quoted-string ) ]
+#
+#	1#( ( charset | "*" )[ ";" "q" "=" qvalue ] )		; Accept-Charset
+#	1#( codings [ ";" "q" "=" qvalue ] )			; Accept-Encoding
+#	1#( language-range [ ";" "q" "=" qvalue ] )		; Accept-Language
+#
+#	codings = ( content-coding | "*" )
+#
+parsecontent(ps: ref Rfclex, multipart: int, head: list of ref Content): list of ref Content
+{
+	do{
+		if(ps.lex() == Word){
+			generic := ps.wordval;
+			specific := "*";
+			if(ps.lex() == '/'){
+				if(ps.lex() != Word)
+					break;
+				specific = ps.wordval;
+				if(!multipart && specific != "*")
+					break;
+			}else if(multipart)
+				break;	# syntax error
+			else
+				ps.unlex();
+			params: list of (string, string) = nil;
+			if(ps.lex() == ';'){
+				params = parseparams(ps);
+				ps.lex();
+			}
+			head = Content.mk(generic, specific, params) :: head;	# order reversed, but doesn't matter
+		}
+	}while(ps.tok == ',');
+	ps.unlex();
+	return head;
+}
+
+rev(l: list of (string, string)): list of (string, string)
+{
+	rl: list of (string, string);
+	for(; l != nil; l = tl l)
+		rl = hd l :: rl;
+	return rl;
+}
+
+Rfclex.mk(a: array of byte): ref Rfclex
+{
+	ps := ref Rfclex;
+	ps.fd = bufio->aopen(a);
+	ps.tok = '\n';
+	ps.eof = 0;
+	return ps;
+}
+
+Rfclex.getc(ps: self ref Rfclex): int
+{
+	c := ps.fd.getb();
+	if(c < 0)
+		ps.eof = 1;
+	return c;
+}
+
+Rfclex.ungetc(ps: self ref Rfclex)
+{
+	if(!ps.eof)
+		ps.fd.ungetb();
+}
+
+Rfclex.lex(ps: self ref Rfclex): int
+{
+	if(ps.seen != nil){
+		(ps.tok, ps.wordval) = hd ps.seen;
+		ps.seen = tl ps.seen;
+	}else
+		ps.tok = lex1(ps, 0);
+	return ps.tok;
+}
+
+Rfclex.unlex(ps: self ref Rfclex)
+{
+	ps.seen = (ps.tok, ps.wordval) :: ps.seen;
+}
+
+Rfclex.skipws(ps: self ref Rfclex): int
+{
+	return lex1(ps, 1);
+}
+
+#
+# rfc 2822/rfc 1521 lexical analyzer
+#
+lex1(ps: ref Rfclex, skipwhite: int): int
+{
+	ps.wordval = nil;
+	while((c := ps.getc()) >= 0){
+		case c {
+		 '(' =>
+			level := 1;
+			while((c = ps.getc()) != Bufio->EOF && c != '\n'){
+				if(c == '\\'){
+					c = ps.getc();
+					if(c == Bufio->EOF)
+						return '\n';
+					continue;
+				}
+				if(c == '(')
+					level++;
+				else if(c == ')' && --level == 0)
+					break;
+			}
+ 		' ' or '\t' or '\r' or 0 =>
+			;
+ 		'\n' =>
+			return '\n';
+		')' or '<' or '>' or '[' or ']' or '@' or '/' or ',' or
+		';' or ':' or '?' or '=' =>
+			if(skipwhite){
+				ps.ungetc();
+				return c;
+			}
+			return c;
+
+ 		'"' =>
+			if(skipwhite){
+				ps.ungetc();
+				return c;
+			}
+			word(ps,"\"");
+			ps.getc();		# skip the closing quote 
+			return QString;
+
+ 		* =>
+			ps.ungetc();
+			if(skipwhite)
+				return c;
+			word(ps,"\"()<>@,;:/[]?={}\r\n \t");
+			return Word;
+		}
+	}
+	return '\n';
+}
+
+# return the rest of an rfc 822 line, not including \r or \n
+# do not map to lower case
+
+Rfclex.line(ps: self ref Rfclex): string
+{
+	s := "";
+	while((c := ps.getc()) != Bufio->EOF && c != '\n' && c != '\r'){
+		if(c == '\\'){
+			c = ps.getc();
+			if(c == Bufio->EOF)
+				break;
+		}
+		s[len s] = c;
+	}
+	ps.tok = '\n';
+	ps.wordval = s;
+	return s;
+}
+
+word(ps: ref Rfclex, stop: string)
+{
+	w := "";
+	while((c := ps.getc()) != Bufio->EOF){
+		if(c == '\r')
+			c = ' ';
+		if(c == '\\'){
+			c = ps.getc();
+			if(c == Bufio->EOF)
+				break;
+		}else if(str->in(c,stop)){
+			ps.ungetc();
+			break;
+		}
+		if(c >= 'A' && c <= 'Z')
+			c += 'a' - 'A';
+		w[len w] = c;
+	}
+	ps.wordval = w;
+}
+
+readsuffixfile(): string
+{
+	iob := bufio->open(SuffixFile, Bufio->OREAD);
+	if(iob == nil)
+		return sys->sprint("cannot open %s: %r", SuffixFile);
+	for(n := 1; (line := iob.gets('\n')) != nil; n++){
+		(s, nil) := parsesuffix(line);
+		if(s != nil)
+			suffixes =  s :: suffixes;
+	}
+	return nil;
+}
+
+parsesuffix(line: string): (ref Suffix, string)
+{
+	(line, nil) = str->splitstrl(line, "#");
+	if(line == nil)
+		return (nil, nil);
+	(n, slist) := sys->tokenize(line,"\n\t ");
+	if(n == 0)
+		return (nil, nil);
+	if(n < 4)
+		return (nil, "too few fields");
+	s := ref Suffix;
+	s.suffix = hd slist;
+	slist = tl slist;
+	s.generic = hd slist;
+	if (s.generic == "-")
+		s.generic = "";	
+	slist = tl slist;
+	s.specific = hd slist;
+	if (s.specific == "-")
+		s.specific = "";	
+	slist = tl slist;
+	s.encoding = hd slist;
+	if (s.encoding == "-")
+		s.encoding = "";
+	if((s.generic == nil || s.specific == nil) && s.encoding == nil)
+		return (nil, nil);
+	return (s, nil);
+}
+
+#
+# classify by file suffix
+#
+suffixclass(name: string): (ref Content, ref Content)
+{
+	typ, enc: ref Content;
+
+	p := str->splitstrr(name, "/").t1;
+	if(p != nil)
+		name = p;
+
+	for(;;){
+		(name, p) = suffix(name);	# TO DO: match below is case sensitive
+		if(p == nil)
+			break;
+		for(l := suffixes; l != nil; l = tl l){
+			s := hd l;
+			if(p == s.suffix){	
+				if(s.generic != nil && typ == nil)
+					typ = Content.mk(s.generic, s.specific, nil);
+				if(s.encoding != nil && enc == nil)
+					enc = Content.mk(s.encoding, "", nil);
+				if(typ != nil && enc != nil)
+					break;
+			}
+		}
+	}
+	return (typ, enc);
+}
+
+suffix(s: string): (string, string)
+{
+	for(n := len s; --n >= 0;)
+		if(s[n] == '.')
+			return (s[0: n], s[n:]);
+	return (s, nil);
+}
+
+#
+#  classify by initial contents of file
+#
+dataclass(a: array of byte): (ref Content, ref Content)
+{
+	utf8 := 0;
+	for(i := 0; i < len a;){
+		c := int a[i];
+		if(c < 16r80){
+			if(c < 32 && c != '\n' && c != '\r' && c != '\t' && c != '\v' && c != '\f')
+				return (nil, nil);
+			i++;
+		}else{
+			utf8 = 1;
+			(r, l, nil) := sys->byte2char(a, i);
+			if(r == Sys->UTFerror)
+				return (nil, nil);
+			i += l;
+		}
+	}
+	if(utf8)
+		params := ("charset", "utf-8") :: nil;
+	return (Content.mk("text", "plain", params), nil);
+}
+
+Content.mk(generic, specific: string, params: list of (string, string)): ref Content
+{
+	c := ref Content;	
+	c.generic = generic;
+	c.specific = specific;
+	c.params = params;
+	return c;
+}
+
+Content.check(me: self ref Content, oks: list of ref Content): int
+{
+	if(oks == nil)
+		return 1;
+	g := str->tolower(me.generic);
+	s := str->tolower(me.specific);
+	for(; oks != nil; oks = tl oks){
+		ok := hd oks;
+		if((ok.generic == g || ok.generic=="*") &&
+		   (s == nil || ok.specific == s || ok.specific=="*"))
+			return 1;
+	}
+	return 0;
+}
+
+Content.text(c: self ref Content): string
+{
+	if((s := c.specific) != nil)
+		s = c.generic+"/"+s;
+	else
+		s = c.generic;
+	for(l := c.params; l != nil; l = tl l){
+		(n, v) := hd l;
+		s += sys->sprint(";%s=%s", n, quote(v));
+	}
+	return s;
+}
+
+#
+# should probably be in a Mime or HTTP module
+#
+
+Quotable: con "()<>@,;:\\\"/[]?={} \t";
+
+quotable(s: string): int
+{
+	for(i := 0; i < len s; i++)
+		if(str->in(s[i], Quotable))
+			return 1;
+	return 0;
+}
+
+quote(s: string): string
+{
+	if(!quotable(s))
+		return s;
+	q :=  "\"";
+	for(i := 0; i < len s; i++){
+		if(str->in(s[i], Quotable))
+			q[len q] = '\\';
+		q[len q] = s[i];
+	}
+	q[len q] = '"';
+	return q;
+}
+
+weekdays := array[] of {
+	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+};
+
+months := array[] of {
+	"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+};
+
+# print dates in the format
+# Wkd, DD Mon YYYY HH:MM:SS GMT
+
+sec2date(t: int): string
+{
+	tm := daytime->gmt(t);
+	return sys->sprint("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT",
+		weekdays[tm.wday], tm.mday, months[tm.mon], tm.year+1900,
+		tm.hour, tm.min, tm.sec);	
+}
+
+# parse dates of formats
+# Wkd, DD Mon YYYY HH:MM:SS GMT
+# Weekday, DD-Mon-YY HH:MM:SS GMT
+# Wkd Mon ( D|DD) HH:MM:SS YYYY
+# plus anything similar
+
+date2sec(date: string): int
+{
+	tm := daytime->string2tm(date);
+	if(tm == nil || tm.year < 70 || tm.zone != "GMT")
+		t := 0;
+	else
+		t = daytime->tm2epoch(tm);
+	return t;
+}
+
+now(): int
+{
+	return daytime->now();
+}
+
+time(): string
+{
+	return sec2date(daytime->now());
+}