diff options
Diffstat (limited to 'appl/lib/rfc822.b')
-rw-r--r-- | appl/lib/rfc822.b | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/appl/lib/rfc822.b b/appl/lib/rfc822.b new file mode 100644 index 00000000..0f15a585 --- /dev/null +++ b/appl/lib/rfc822.b @@ -0,0 +1,561 @@ +implement RFC822; + +include "sys.m"; + sys: Sys; + +include "bufio.m"; + bufio: Bufio; + Iobuf: import bufio; + +include "rfc822.m"; + +include "string.m"; + str: String; + +include "daytime.m"; + daytime: Daytime; + Tm: import daytime; + +Minrequest: con 512; # more than enough for most requests + +Suffix: adt { + suffix: string; + generic: string; + specific: string; + encoding: string; +}; + +SuffixFile: con "/lib/mimetype"; +mtime := 0; +qid: Sys->Qid; + +suffixes: list of ref Suffix; + +nomod(s: string) +{ + raise sys->sprint("internal: can't load %s: %r", s); +} + +init(b: Bufio) +{ + sys = load Sys Sys->PATH; + bufio = b; + str = load String String->PATH; + if(str == nil) + nomod(String->PATH); + daytime = load Daytime Daytime->PATH; + if(daytime == nil) + nomod(Daytime->PATH); + readsuffixfile(); +} + +readheaders(fd: ref Iobuf, limit: int): array of (string, array of byte) +{ + n := 0; + s := 0; + b := array[Minrequest] of byte; + nline := 0; + lines: list of array of byte; + while((c := fd.getb()) >= 0){ + if(c == '\r'){ + c = fd.getb(); + if(c < 0) + break; + if(c != '\n'){ + fd.ungetb(); + c = '\r'; + } + } + if(n >= len b){ + if(len b >= limit) + return nil; + ab := array[n+512] of byte; + ab[0:] = b; + b = ab; + } + b[n++] = byte c; + if(c == '\n'){ + if(n == 1 || b[n-2] == byte '\n') + break; # empty line + c = fd.getb(); + if(c < 0) + break; + if(c != ' ' && c != '\t'){ # not continued + fd.ungetb(); + lines = b[s: n] :: lines; + nline++; + s = n; + }else + b[n-1] = byte ' '; + } + } + if(n == 0) + return nil; + b = b[0: n]; + if(n != s){ + lines = b[s:n] :: lines; + nline++; + } + a := array[nline] of (string, array of byte); + for(; lines != nil; lines = tl lines){ + b = hd lines; + name := ""; + for(i := 0; i < len b; i++) + if(b[i] == byte ':'){ + name = str->tolower(string b[0:i]); + b = b[i+1:]; + break; + } + a[--nline] = (name, b); + } + return a; +} + +# +# *(";" parameter) used in transfer-extension, media-type and media-range +# parameter = attribute "=" value +# attribute = token +# value = token | quoted-string +# +parseparams(ps: ref Rfclex): list of (string, string) +{ + l: list of (string, string); + do{ + if(ps.lex() != Word) + break; + attr := ps.wordval; + if(ps.lex() != '=' || ps.lex() != Word && ps.tok != QString) + break; + l = (attr, ps.wordval) :: l; + }while(ps.lex() == ';'); + ps.unlex(); + return rev(l); +} + +# +# 1#transfer-coding +# +mimefields(ps: ref Rfclex): list of (string, list of (string, string)) +{ + rf: list of (string, list of (string, string)); + do{ + if(ps.lex() == Word){ + w := ps.wordval; + if(ps.lex() == ';'){ + rf = (w, parseparams(ps)) :: rf; + ps.lex(); + }else + rf = (w, nil) :: rf; + } + }while(ps.tok == ','); + ps.unlex(); + f: list of (string, list of (string, string)); + for(; rf != nil; rf = tl rf) + f = hd rf :: f; + return f; +} + +# #(media-type | (media-range [accept-params])) ; Content-Type and Accept +# +# media-type = type "/" subtype *( ";" parameter ) +# type = token +# subtype = token +# LWS must not be used between type and subtype, nor between attribute and value (in parameter) +# +# media-range = ("*/*" | type "/*" | type "/" subtype ) *(";' parameter) +# accept-params = ";" "q" "=" qvalue *( accept-extension ) +# accept-extension = ";" token [ "=" ( token | quoted-string ) ] +# +# 1#( ( charset | "*" )[ ";" "q" "=" qvalue ] ) ; Accept-Charset +# 1#( codings [ ";" "q" "=" qvalue ] ) ; Accept-Encoding +# 1#( language-range [ ";" "q" "=" qvalue ] ) ; Accept-Language +# +# codings = ( content-coding | "*" ) +# +parsecontent(ps: ref Rfclex, multipart: int, head: list of ref Content): list of ref Content +{ + do{ + if(ps.lex() == Word){ + generic := ps.wordval; + specific := "*"; + if(ps.lex() == '/'){ + if(ps.lex() != Word) + break; + specific = ps.wordval; + if(!multipart && specific != "*") + break; + }else if(multipart) + break; # syntax error + else + ps.unlex(); + params: list of (string, string) = nil; + if(ps.lex() == ';'){ + params = parseparams(ps); + ps.lex(); + } + head = Content.mk(generic, specific, params) :: head; # order reversed, but doesn't matter + } + }while(ps.tok == ','); + ps.unlex(); + return head; +} + +rev(l: list of (string, string)): list of (string, string) +{ + rl: list of (string, string); + for(; l != nil; l = tl l) + rl = hd l :: rl; + return rl; +} + +Rfclex.mk(a: array of byte): ref Rfclex +{ + ps := ref Rfclex; + ps.fd = bufio->aopen(a); + ps.tok = '\n'; + ps.eof = 0; + return ps; +} + +Rfclex.getc(ps: self ref Rfclex): int +{ + c := ps.fd.getb(); + if(c < 0) + ps.eof = 1; + return c; +} + +Rfclex.ungetc(ps: self ref Rfclex) +{ + if(!ps.eof) + ps.fd.ungetb(); +} + +Rfclex.lex(ps: self ref Rfclex): int +{ + if(ps.seen != nil){ + (ps.tok, ps.wordval) = hd ps.seen; + ps.seen = tl ps.seen; + }else + ps.tok = lex1(ps, 0); + return ps.tok; +} + +Rfclex.unlex(ps: self ref Rfclex) +{ + ps.seen = (ps.tok, ps.wordval) :: ps.seen; +} + +Rfclex.skipws(ps: self ref Rfclex): int +{ + return lex1(ps, 1); +} + +# +# rfc 2822/rfc 1521 lexical analyzer +# +lex1(ps: ref Rfclex, skipwhite: int): int +{ + ps.wordval = nil; + while((c := ps.getc()) >= 0){ + case c { + '(' => + level := 1; + while((c = ps.getc()) != Bufio->EOF && c != '\n'){ + if(c == '\\'){ + c = ps.getc(); + if(c == Bufio->EOF) + return '\n'; + continue; + } + if(c == '(') + level++; + else if(c == ')' && --level == 0) + break; + } + ' ' or '\t' or '\r' or 0 => + ; + '\n' => + return '\n'; + ')' or '<' or '>' or '[' or ']' or '@' or '/' or ',' or + ';' or ':' or '?' or '=' => + if(skipwhite){ + ps.ungetc(); + return c; + } + return c; + + '"' => + if(skipwhite){ + ps.ungetc(); + return c; + } + word(ps,"\""); + ps.getc(); # skip the closing quote + return QString; + + * => + ps.ungetc(); + if(skipwhite) + return c; + word(ps,"\"()<>@,;:/[]?={}\r\n \t"); + return Word; + } + } + return '\n'; +} + +# return the rest of an rfc 822 line, not including \r or \n +# do not map to lower case + +Rfclex.line(ps: self ref Rfclex): string +{ + s := ""; + while((c := ps.getc()) != Bufio->EOF && c != '\n' && c != '\r'){ + if(c == '\\'){ + c = ps.getc(); + if(c == Bufio->EOF) + break; + } + s[len s] = c; + } + ps.tok = '\n'; + ps.wordval = s; + return s; +} + +word(ps: ref Rfclex, stop: string) +{ + w := ""; + while((c := ps.getc()) != Bufio->EOF){ + if(c == '\r') + c = ' '; + if(c == '\\'){ + c = ps.getc(); + if(c == Bufio->EOF) + break; + }else if(str->in(c,stop)){ + ps.ungetc(); + break; + } + if(c >= 'A' && c <= 'Z') + c += 'a' - 'A'; + w[len w] = c; + } + ps.wordval = w; +} + +readsuffixfile(): string +{ + iob := bufio->open(SuffixFile, Bufio->OREAD); + if(iob == nil) + return sys->sprint("cannot open %s: %r", SuffixFile); + for(n := 1; (line := iob.gets('\n')) != nil; n++){ + (s, nil) := parsesuffix(line); + if(s != nil) + suffixes = s :: suffixes; + } + return nil; +} + +parsesuffix(line: string): (ref Suffix, string) +{ + (line, nil) = str->splitstrl(line, "#"); + if(line == nil) + return (nil, nil); + (n, slist) := sys->tokenize(line,"\n\t "); + if(n == 0) + return (nil, nil); + if(n < 4) + return (nil, "too few fields"); + s := ref Suffix; + s.suffix = hd slist; + slist = tl slist; + s.generic = hd slist; + if (s.generic == "-") + s.generic = ""; + slist = tl slist; + s.specific = hd slist; + if (s.specific == "-") + s.specific = ""; + slist = tl slist; + s.encoding = hd slist; + if (s.encoding == "-") + s.encoding = ""; + if((s.generic == nil || s.specific == nil) && s.encoding == nil) + return (nil, nil); + return (s, nil); +} + +# +# classify by file suffix +# +suffixclass(name: string): (ref Content, ref Content) +{ + typ, enc: ref Content; + + p := str->splitstrr(name, "/").t1; + if(p != nil) + name = p; + + for(;;){ + (name, p) = suffix(name); # TO DO: match below is case sensitive + if(p == nil) + break; + for(l := suffixes; l != nil; l = tl l){ + s := hd l; + if(p == s.suffix){ + if(s.generic != nil && typ == nil) + typ = Content.mk(s.generic, s.specific, nil); + if(s.encoding != nil && enc == nil) + enc = Content.mk(s.encoding, "", nil); + if(typ != nil && enc != nil) + break; + } + } + } + return (typ, enc); +} + +suffix(s: string): (string, string) +{ + for(n := len s; --n >= 0;) + if(s[n] == '.') + return (s[0: n], s[n:]); + return (s, nil); +} + +# +# classify by initial contents of file +# +dataclass(a: array of byte): (ref Content, ref Content) +{ + utf8 := 0; + for(i := 0; i < len a;){ + c := int a[i]; + if(c < 16r80){ + if(c < 32 && c != '\n' && c != '\r' && c != '\t' && c != '\v' && c != '\f') + return (nil, nil); + i++; + }else{ + utf8 = 1; + (r, l, nil) := sys->byte2char(a, i); + if(r == Sys->UTFerror) + return (nil, nil); + i += l; + } + } + if(utf8) + params := ("charset", "utf-8") :: nil; + return (Content.mk("text", "plain", params), nil); +} + +Content.mk(generic, specific: string, params: list of (string, string)): ref Content +{ + c := ref Content; + c.generic = generic; + c.specific = specific; + c.params = params; + return c; +} + +Content.check(me: self ref Content, oks: list of ref Content): int +{ + if(oks == nil) + return 1; + g := str->tolower(me.generic); + s := str->tolower(me.specific); + for(; oks != nil; oks = tl oks){ + ok := hd oks; + if((ok.generic == g || ok.generic=="*") && + (s == nil || ok.specific == s || ok.specific=="*")) + return 1; + } + return 0; +} + +Content.text(c: self ref Content): string +{ + if((s := c.specific) != nil) + s = c.generic+"/"+s; + else + s = c.generic; + for(l := c.params; l != nil; l = tl l){ + (n, v) := hd l; + s += sys->sprint(";%s=%s", n, quote(v)); + } + return s; +} + +# +# should probably be in a Mime or HTTP module +# + +Quotable: con "()<>@,;:\\\"/[]?={} \t"; + +quotable(s: string): int +{ + for(i := 0; i < len s; i++) + if(str->in(s[i], Quotable)) + return 1; + return 0; +} + +quote(s: string): string +{ + if(!quotable(s)) + return s; + q := "\""; + for(i := 0; i < len s; i++){ + if(str->in(s[i], Quotable)) + q[len q] = '\\'; + q[len q] = s[i]; + } + q[len q] = '"'; + return q; +} + +weekdays := array[] of { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" +}; + +months := array[] of { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; + +# print dates in the format +# Wkd, DD Mon YYYY HH:MM:SS GMT + +sec2date(t: int): string +{ + tm := daytime->gmt(t); + return sys->sprint("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT", + weekdays[tm.wday], tm.mday, months[tm.mon], tm.year+1900, + tm.hour, tm.min, tm.sec); +} + +# parse dates of formats +# Wkd, DD Mon YYYY HH:MM:SS GMT +# Weekday, DD-Mon-YY HH:MM:SS GMT +# Wkd Mon ( D|DD) HH:MM:SS YYYY +# plus anything similar + +date2sec(date: string): int +{ + tm := daytime->string2tm(date); + if(tm == nil || tm.year < 70 || tm.zone != "GMT") + t := 0; + else + t = daytime->tm2epoch(tm); + return t; +} + +now(): int +{ + return daytime->now(); +} + +time(): string +{ + return sec2date(daytime->now()); +} |