http*: serve embedded resources through resource abstraction

This commit is contained in:
Shiz 2024-09-17 11:34:31 +02:00
parent b484c1b99b
commit 53316a7f36
3 changed files with 165 additions and 21 deletions

View File

@ -68,12 +68,15 @@ from .util import (
get_spd,
guess_mime,
gzip_orig_sz,
gzip_file_orig_sz,
has_resource,
hashcopy,
hidedir,
html_bescape,
html_escape,
humansize,
ipnorm,
load_resource,
loadpy,
log_reloc,
min_ex,
@ -93,6 +96,7 @@ from .util import (
sanitize_vpath,
sendfile_kern,
sendfile_py,
stat_resource,
ub64dec,
ub64enc,
ujoin,
@ -1093,12 +1097,11 @@ class HttpCli(object):
if self.vpath == ".cpr/metrics":
return self.conn.hsrv.metrics.tx(self)
path_base = os.path.join(self.E.mod, "web")
static_path = absreal(os.path.join(path_base, self.vpath[5:]))
static_path = os.path.join("web", self.vpath[5:])
if static_path in self.conn.hsrv.statics:
return self.tx_file(static_path)
return self.tx_res(static_path)
if not static_path.startswith(path_base):
if not undot(static_path).startswith("web"):
t = "malicious user; attempted path traversal [{}] => [{}]"
self.log(t.format(self.vpath, static_path), 1)
self.cbonk(self.conn.hsrv.gmal, self.req, "trav", "path traversal")
@ -3300,6 +3303,129 @@ class HttpCli(object):
return txt
def tx_res(self, req_path: str) -> bool:
status = 200
logmsg = "{:4} {} ".format("", self.req)
logtail = ""
editions = {}
file_ts = 0
if has_resource(self.E, req_path):
st = stat_resource(self.E, req_path)
if st:
file_ts = max(file_ts, st.st_mtime)
editions["plain"] = req_path
if has_resource(self.E, req_path + ".gz"):
st = stat_resource(self.E, req_path + ".gz")
if st:
file_ts = max(file_ts, st.st_mtime)
if not st or st.st_mtime > file_ts:
editions[".gz"] = req_path + ".gz"
if not editions:
return self.tx_404()
#
# if-modified
if file_ts > 0:
file_lastmod, do_send = self._chk_lastmod(int(file_ts))
self.out_headers["Last-Modified"] = file_lastmod
if not do_send:
status = 304
if self.can_write:
self.out_headers["X-Lastmod3"] = str(int(file_ts * 1000))
else:
do_send = True
#
# Accept-Encoding and UA decides which edition to send
decompress = False
supported_editions = [
x.strip()
for x in self.headers.get("accept-encoding", "").lower().split(",")
]
if ".gz" in editions:
is_compressed = True
selected_edition = ".gz"
if "gzip" not in supported_editions:
decompress = True
else:
if re.match(r"MSIE [4-6]\.", self.ua) and " SV1" not in self.ua:
decompress = True
if not decompress:
self.out_headers["Content-Encoding"] = "gzip"
else:
is_compressed = False
selected_edition = "plain"
res_path = editions[selected_edition]
logmsg += "{} ".format(selected_edition.lstrip("."))
res = load_resource(self.E, res_path)
if decompress:
file_sz = gzip_file_orig_sz(res)
res = gzip.open(res)
else:
res.seek(0, os.SEEK_END)
file_sz = res.tell()
res.seek(0, os.SEEK_SET)
#
# send reply
if is_compressed:
self.out_headers["Cache-Control"] = "max-age=604869"
else:
self.permit_caching()
if "txt" in self.uparam:
mime = "text/plain; charset={}".format(self.uparam["txt"] or "utf-8")
elif "mime" in self.uparam:
mime = str(self.uparam.get("mime"))
else:
mime = guess_mime(req_path)
logmsg += unicode(status) + logtail
if self.mode == "HEAD" or not do_send:
res.close()
if self.do_log:
self.log(logmsg)
self.send_headers(length=file_sz, status=status, mime=mime)
return True
ret = True
self.send_headers(length=file_sz, status=status, mime=mime)
remains = sendfile_py(
self.log,
0,
file_sz,
res,
self.s,
self.args.s_wr_sz,
self.args.s_wr_slp,
not self.args.no_poll,
)
if remains > 0:
logmsg += " \033[31m" + unicode(file_sz - remains) + "\033[0m"
ret = False
spd = self._spd(file_sz - remains)
if self.do_log:
self.log("{}, {}".format(logmsg, spd))
return ret
def tx_file(self, req_path: str, ptop: Optional[str] = None) -> bool:
status = 200
logmsg = "{:4} {} ".format("", self.req)
@ -3815,15 +3941,11 @@ class HttpCli(object):
return self.tx_404(True)
tpl = "mde" if "edit2" in self.uparam else "md"
html_path = os.path.join(self.E.mod, "web", "{}.html".format(tpl))
template = self.j2j(tpl)
st = bos.stat(fs_path)
ts_md = st.st_mtime
st = bos.stat(html_path)
ts_html = st.st_mtime
max_sz = 1024 * self.args.txt_max
sz_md = 0
lead = b""
@ -3857,7 +3979,7 @@ class HttpCli(object):
fullfile = html_bescape(fullfile)
sz_md = len(lead) + len(fullfile)
file_ts = int(max(ts_md, ts_html, self.E.t0))
file_ts = int(max(ts_md, self.E.t0))
file_lastmod, do_send = self._chk_lastmod(file_ts)
self.out_headers["Last-Modified"] = file_lastmod
self.out_headers.update(NO_CACHE)
@ -3896,7 +4018,7 @@ class HttpCli(object):
zs = template.render(**targs).encode("utf-8", "replace")
html = zs.split(boundary.encode("utf-8"))
if len(html) != 2:
raise Exception("boundary appears in " + html_path)
raise Exception("boundary appears in " + tpl)
self.send_headers(sz_md + len(html[0]) + len(html[1]), status)

View File

@ -68,13 +68,17 @@ from .util import (
NetMap,
absreal,
build_netmap,
has_resource,
ipnorm,
load_resource,
min_ex,
shut_socket,
spack,
start_log_thrs,
start_stackmon,
stat_resource,
ub64enc,
walk_resources,
)
if TYPE_CHECKING:
@ -91,6 +95,10 @@ if not hasattr(socket, "AF_UNIX"):
setattr(socket, "AF_UNIX", -9001)
def load_jinja2_resource(E: EnvParams, name: str):
return load_resource(E, os.path.join("web", name), "r").read()
class HttpSrv(object):
"""
handles incoming connections using HttpConn to process http,
@ -153,7 +161,7 @@ class HttpSrv(object):
self.u2idx_n = 0
env = jinja2.Environment()
env.loader = jinja2.FileSystemLoader(os.path.join(self.E.mod, "web"))
env.loader = jinja2.FunctionLoader(lambda f: load_jinja2_resource(self.E, f))
jn = [
"splash",
"shares",
@ -166,8 +174,7 @@ class HttpSrv(object):
"cf",
]
self.j2 = {x: env.get_template(x + ".html") for x in jn}
zs = os.path.join(self.E.mod, "web", "deps", "prism.js.gz")
self.prism = os.path.exists(zs)
self.prism = has_resource(self.E, os.path.join("web", "deps", "prism.js.gz"))
self.ipa_nm = build_netmap(self.args.ipa)
self.xff_nm = build_netmap(self.args.xff_src)
@ -210,9 +217,9 @@ class HttpSrv(object):
pass
def _build_statics(self) -> None:
for dp, _, df in os.walk(os.path.join(self.E.mod, "web")):
for dp, _, df in walk_resources(self.E, "web"):
for fn in df:
ap = absreal(os.path.join(dp, fn))
ap = os.path.join(dp, fn)
self.statics.add(ap)
if ap.endswith(".gz"):
self.statics.add(ap[:-3])
@ -536,10 +543,20 @@ class HttpSrv(object):
v = self.E.t0
try:
with os.scandir(os.path.join(self.E.mod, "web")) as dh:
for fh in dh:
inf = fh.stat()
for (base, dirs, files) in walk_resources(self.E, "web"):
inf = stat_resource(self.E, base)
if inf:
v = max(v, inf.st_mtime)
for d in dirs:
inf = stat_resource(self.E, os.path.join(base, d))
if inf:
v = max(v, inf.st_mtime)
for f in files:
inf = stat_resource(self.E, os.path.join(base, e))
if inf:
v = max(v, inf.st_mtime)
# only do top-level
break
except:
pass

View File

@ -3417,9 +3417,14 @@ def loadpy(ap: str, hot: bool) -> Any:
def gzip_orig_sz(fn: str) -> int:
with open(fsenc(fn), "rb") as f:
f.seek(-4, 2)
rv = f.read(4)
return sunpack(b"I", rv)[0] # type: ignore
return gzip_file_orig_sz(f)
def gzip_file_orig_sz(f) -> int:
start = f.tell()
f.seek(-4, 2)
rv = f.read(4)
f.seek(start, 0)
return sunpack(b"I", rv)[0] # type: ignore
def align_tab(lines: list[str]) -> list[str]: