#!/usr/bin/gawk -f # # # S I M P L E W E B S E R V E R F R A M E W O R K # --------------------------------------------------- # function skipws(string) { sub(/^[ \t]+/, "", string); return (string); } function noctrl(string) { sub(/[ \t\r]+$/, "", string); return (string); } function shellquote(string) { gsub(/'/, "'\\''", string); string = "'" string "'"; return (string); } # # P A R T 1 - R E A D I N G T H E C L I E N T R E Q U E S T # --------------------------------------------------------------------- # # Calling readrequest() read the client's request, sets the arrays # REQUEST, HEADER and CGIVAR and returns the request's URI. # function readline( line) { if (getline line > 0) { gsub(/\r/, "", line); return (line); } return ("\001"); } function decodecgi(value, result, k, a, b, c) { HEX = "123456789ABCDEF"; result = ""; while ((k = index(value, "%")) > 0) { a = substr(value, k+1, 1); b = substr(value, k+2, 1); c = sprintf ("%c", index(HEX, a) * 16 + index(HEX, b)); result = result substr(value, 1, k-1) c; value = substr(value, k+3); } result = result value; return (result); } function getcgivars(string, i, k, n, v, w, x, list) { gsub(/\+/, " ", string); n = split(string, x, "&"); for (i=1; i<=n; i++) { if ((k = index(x[i], "=")) > 0) { v = "CGI_" toupper(substr(x[i], 1, k-1)); gsub(/-/, "_", v); w = decodecgi(substr(x[i], k+1)); if (v in var) var[v] = var[v] "," w; else { var[v] = w; list = list " " v; } } } var["QUERY_PARAMETERS"] = substr(list, 2); return (0); } function readrequest( k, headername, value) { if (($0 = readline()) == "\001") { printf ("Broken request.\n") >>STDERR; return (""); } REQUEST["method"] = var["REQUEST_METHOD"] = $1; REQUEST["protocol"] = var["REQUEST_PROTOCOL"] = $3; if ((k = index($2, "?")) == 0) REQUEST["selector"] = $2; else { REQUEST["query"] = var["QUERY_STRING"] = substr($2, k+1); REQUEST["selector"] = substr($2, 1, k-1); getcgivars(REQUEST["query"]); } while ((line = readline()) != "\001" && line != "") { if ((k = index(line, ":")) > 0) { headername = tolower(substr(line, 1, k-1)); gsub(/-/, "_", headername); value = skipws(substr(line, k+1)); var["HTTP_" toupper(headername)] = value; } } return (REQUEST["selector"]); } # # P A R T 2 - O U T P U T R E L A T E D F U N C T I O N S # ----------------------------------------------------------------- # # The following functions handle "send output to client" # tasks. # function httpheader(code, message, type, resp, u, v, w) { printf ("HTTP/1.0 %03d %s\r\n", code, message); if (type != "") printf ("Content-Type: %s\r\n", type); for (w in resp) { # Beautify header names. if (w == "" || tolower(w) == "content-type") continue; u = toupper(substr(w, 1, 1)) tolower(substr(w, 2)); v = ""; while (match(u, /-[a-z]/) > 0) { v = substr(u, 1, RSTART) toupper(substr(u, RSTART+1, 1)); u = substr(u, RSTART+2); } v = v u; printf ("%s: %s\r\n", v, resp[w]); } printf ("\r\n"); RESPONSE["status"] = code; RESPONSE["content-type"] = type; return (0); } function starthtml(title, printbody) { printf ("\n"); printf ("\n"); printf (" %s\n", title); printf ("\n"); if (printbody != 0) printf ("\n"); return (0); } function endhtml() { printf ("\n"); printf ("\n"); return (0); } function senderror(code, message, resp) { if (message == "" && match(code, / /) > 0) { message = substr(code, RSTART+1); code = code + 0; } httpheader(code, message, "text/html", resp); starthtml(message, 1); printf ("%s %s: %s
\n", code, message, REQUEST["selector"]); endhtml(); return (0); } function sendredirect(code, location, message, resp) { resp["location"] = location; message = "item moved"; httpheader(code, message, "text/html", resp); starthtml(message, 1); printf ("%s %s: %s
\n", code, message, location); endhtml(); return (0); } function printtest() { printf ("method: %s
\n", REQUEST["method"]); printf ("selector: %s
\n", REQUEST["selector"]); if (REQUEST["query"] != "") printf ("query-string: %s
\n", REQUEST["query"]); printf ("protocol: %s
\n", REQUEST["protocol"]); printf ("

\n"); if (REQUEST["selector"] != "") { for (var in CGIVAR) printf ("%s: %s
\n", var, CGIVAR[var]); printf ("

\n"); } for (name in var) { printf ("%s: %s
\n", name, var[name]); wantp = 1 } return (0); } # # P A R T 3 - S E R V I N G R E Q U E S T S # ------------------------------------------------- # # # Remove `/../' and `/./' from the given path. # function rewritepath(path, i, k, n, x, y) { n = split(path, x, /\/+/); k = 0; for (i=1; i<=n; i++) { if (x[i] == "") { if (i == n) y[k++] = ""; continue; } if (x[i] == ".") continue; else if (x[i] == "..") { if (k >= 1) k--; } else y[k++] = x[i]; } path = y[0]; for (i=1; i < k; i++) path = path "/" y[i]; return (path); } function checkcgi(path) { if (match(path, /\.cgi$/) > 0) { var["PATH_INFO"] = ""; var["SCRIPT_NAME"] = "/" path; } else if (match(path, /\.cgi\//) > 0) { var["PATH_INFO"] = substr(path, RSTART + RLENGTH - 1); path = substr(path, 1, RSTART + RLENGTH - 2); var["SCRIPT_NAME"] = "/" path; } if (var["SCRIPT_NAME"] != "") { var["PATH_TRANSLATED"] = var["DOCUMENT_ROOT"] var["PATH_INFO"]; } return (path); } function stat(path, sbuf, gettime, cmd, line, filename, i, n, x, y, z) { cmd = sprintf ("/bin/ls -fld %s 2>/dev/null", path); cmd | getline line; close (cmd); if ((n = split(line, x, /[ \t]+/)) == 9) ; # ok - "old style" ls else if (n == 8 && x[6] ~ /-[0-9][0-9]-/) ; # also ok - "modern style" else { # other output means `does not exist'. return (1); } # # Take some data from the `ls' output. # sbuf["type"] = substr(x[1], 1, 1) == "d"? "dir": "file"; sbuf["perm"] = x[1]; gsub(/\/+/, "/", x[n]); sbuf["filename"] = filename = x[n]; sbuf["size"] = x[5]; if (gettime != 0) { # # Try to compute the last-modification time. # if (n == 8) { # # Yes, "modern style" simplyfies things. # split(x[6], z, "-"); split(x[7], y, ":"); line = sprintf ("%04d %02d %02d %02d %02d %02d", z[1], z[2], z[3], y[1], y[2], y[3]); } else { n = split("jan feb mar apr may jun jul aug sep oct nov dec", y, " "); for (i=1; i <= n; i++) z[y[i]] = sprintf ("%d", i); if (x[8] !~ /:/) line = sprintf ("%04d %02d %02d 00 00 00", x[8], z[tolower(x[6])], x[7]); else { split(x[8], y, ":"); line = sprintf ("%04d %02d %02d %02d %02d %02d", strftime("%Y", systime()), z[tolower(x[6])], x[7], y[1], y[2], y[3]); } } sbuf["rawtime"] = line; sbuf["mtime"] = mktime(line); sbuf["lastmod"] = strftime("%a, %d %b %Y %H:%M:%S %z", sbuf["mtime"]); } return (0); } function getmimetype(filename, type, ext) { sub(/^.*\//, "", filename); if (match(filename, /\.[^\.]+(\.gz)?/) == 0) { # No extension is `text/plain' type = "text/plain"; } else { ext = substr(filename, RSTART); sub(/\.gz$/, "", ext); # # Set MIME type for some well known extensions. # if (ext == ".html" || ext == ".htm") type = "text/html"; else if (ext == ".txt" || ext == ".text") type = "text/plain"; else if (ext == ".gif") type = "image/gif"; else if (ext == ".jpg" || ext == ".jpeg") type = "image/jpg"; else if (ext == ".png") type = "image/png"; else if (ext == ".css") type = "text/css"; else if (ext == ".js") type = "text/javascript"; else if (ext == ".cgi") type = "cgi"; else type = "application/octet-stream"; } return (type); } function checkfile(basedir, path, sbuf, i, n, filename, x) { sbuf["path"] = path; # # Check if path refers to a dot-file and deny access. # if (path ~ /^\./ || path ~ /\/\./) return (403); # # Check if path contains any invalid characters. # if (path == "") ; # Refers to the "homepage". else if (path !~ /^[-_a-zA-Z0-9,+\.\/]+$/) return (403); # # As far as we know the request is valid. Does the requested # item exist and, if yes, what is it? # if (path != "" && path !~ /\/$/) filename = sprintf ("'%s/%s'", basedir, path); else { # # If the request points to a directory (trailing slash) # we have to look for a list of files. # n = split("index.html index.htm", x, " "); for (i=1; i <= n; i++) filename = filename " " sprintf ("'%s/%s/%s'", basedir, path, x[i]); filename = substr(filename, 2) " " sprintf("'%s/%s/.'", basedir, path); } if (stat(filename, sbuf, 1) != 0) return (404); # File not found. if (sbuf["type"] == "dir") { # # If the filename has a trailing dot it's a directory # without default file and the request was done with # a trailing slash. If not redirect. # if (match(sbuf["filename"], /\/\.$/) == 0) { sbuf["location"] = "/" path "/"; return (302); } } if (sbuf["type"] == "file") sbuf["mimetype"] = getmimetype(sbuf["filename"]); if (sbuf["mimetype"] == "cgi") sbuf["script"] = sbuf["filename"]; return (sbuf["type"]); } function listdir(dir, path, i, n, cmd, line, x, filename, data) { data = sprintf ("

%s:
\n", path); data = data sprintf ("..\n"); cmd = sprintf ("/bin/ls -l '%s'", dir); while (cmd | getline line > 0) { if (split(line, x, /[ \t]+/) < 9) continue; filename = x[9]; if (filename ~ /^\./) continue; if (substr(x[1], 1, 1) == "d") filename = filename "/"; line = sprintf ("%s", filename, filename); line = line substr(" ", length(filename)); line = line sprintf ("%10d %s %02d %s", x[5], x[6], x[7], x[8]); data = data line "\n"; } data = data "
\n"; return (data); } function runcgi(script, sbuf, resp, status, cmd, line, v, x, data) { status = 0; # # Construct the shell command, pass parameters as environment # variables. # cmd = "/usr/bin/env"; for (v in var) cmd = cmd " " v "=" shellquote(var[v]); cmd = cmd " " script " 2>/dev/null"; while (cmd | getline line > 0) { if (line == "") break; if (match(line, /^([^:]+): (.*)$/, x) == 0) continue; if (tolower(x[1]) == "status") status = x[2] + 0; else resp[tolower(x[1])] = x[2]; } while (cmd | getline line > 0) data = data line RT; sbuf["data"] = data; if (status == 0) status = "500 Server error"; return (status); } function nextarg(par, arg) { if (argi >= ARGC) { printf ("%s: missing argument: %s\n", program, par) >>STDERR; exit (1); } arg = ARGV[argi]; ARGV[argi++] = ""; return (arg); } BEGIN { program = "www-server"; STDERR = "/dev/stderr"; argi = 1; while (argi < ARGC && substr(ARGV[argi], 1, 1) == "-") { options = nextarg("option"); if (options == "--") break; for (i = 2; i<=length(options); i++) { c = substr(options, i, 1); if (c == "d") debug = 1; else if (c == "l") logfile = nextarg("logfile"); else if (c == "t") testmode = 1; else { printf ("%s: unkown option: -%s\n", program, c) >>STDERR; exit (1); } } } basedir = (argi >= ARGC)? "/tmp/www": nextarg("base directory"); var["DOCUMENT_ROOT"] = basedir; if ((var["REMOTE_ADDR"] = ENVIRON["CONNECT_CLIENT"]) == "") var["REMOTE_ADDR"] = "-"; if ((path = readrequest()) == "") exit (1); path = rewritepath(path); path = checkcgi(path); if (testmode != 0) { printtest(); exit (0); } type = checkfile(basedir, path, sbuf); if (type+0 > 0) { if (type == 302) sendredirect(302, sbuf["location"]); else if (type == 403) senderror(type, "Access denied"); else if (type == 404) senderror(type, "Bad request"); else senderror(500, "Server error."); sbuf["size"] = 0; sbuf["mimetype"] = "text/html"; exit (0); } status = 200; resp[""] = ""; if (sbuf["type"] == "dir") { p = sbuf["filename"]; sbuf["data"] = listdir(p, "/" path); resp["content-type"] = "text/html"; resp["content-length"] = length(sbuf["data"]); sbuf["filename"] = "."; } else if (sbuf["mimetype"] == "cgi") { script = sbuf["filename"]; status = runcgi(script, sbuf, resp); resp["content-length"] = length(sbuf["data"]); sbuf["filename"] = "."; } else { resp["content-type"] = sbuf["mimetype"]; resp["content-length"] = sbuf["size"]; resp["last-modified"] = sbuf["lastmod"]; } if (status+0 != 200) { httpheader(status, "", "text/html", resp); exit (0); } httpheader(status, "OK", resp["content-type"], resp); if ((filename = sbuf["filename"]) == ".") { print sbuf["data"]; } else { while (getline buffer 0) printf ("%s%s", buffer, RT); } exit (0); } END { if (logfile != "") { url = REQUEST["selector"]; if (var["QUERY_STRING"] != "") url = url "?" var["QUERY_STRING"]; printf ("%s - - [%s] \"%s %s %s\" %d %d %s\n", var["REMOTE_ADDR"], strftime("%d/%b/%Y:%H:%M:%S %z", systime()), var["REQUEST_METHOD"], url, var["REQUEST_PROTOCOL"], RESPONSE["status"], resp["content-length"], resp["content-type"]) >>logfile; } exit (0); }