v1.1.10

sendfile: handle eagain
v1.1.9
2021-12-17 00:05:17 +01:00 · 2021-12-17 00:04:19 +01:00 · 2021-12-16 22:54:44 +01:00 · 2021-12-16 22:51:24 +01:00 · 2021-12-12 20:11:07 +01:00 · 2021-12-11 20:31:04 +01:00
11 changed files with 279 additions and 52 deletions
--- a/bin/copyparty-fuseb.py
+++ b/bin/copyparty-fuseb.py
@@ -11,14 +11,18 @@ import re
 import os
 import sys
 import time
+import json
 import stat
 import errno
 import struct
+import codecs
+import platform
 import threading
 import http.client  # py2: httplib
 import urllib.parse
 from datetime import datetime
 from urllib.parse import quote_from_bytes as quote
+from urllib.parse import unquote_to_bytes as unquote

 try:
    import fuse
@@ -38,7 +42,7 @@ except:
 mount a copyparty server (local or remote) as a filesystem

 usage:
-  python ./copyparty-fuseb.py -f -o allow_other,auto_unmount,nonempty,url=http://192.168.1.69:3923 /mnt/nas
+  python ./copyparty-fuseb.py -f -o allow_other,auto_unmount,nonempty,pw=wark,url=http://192.168.1.69:3923 /mnt/nas

 dependencies:
  sudo apk add fuse-dev python3-dev
@@ -50,6 +54,10 @@ fork of copyparty-fuse.py based on fuse-python which
 """


+WINDOWS = sys.platform == "win32"
+MACOS = platform.system() == "Darwin"
+
+
 def threadless_log(msg):
    print(msg + "\n", end="")

@@ -93,6 +101,41 @@ def html_dec(txt):
    )


+def register_wtf8():
+    def wtf8_enc(text):
+        return str(text).encode("utf-8", "surrogateescape"), len(text)
+
+    def wtf8_dec(binary):
+        return bytes(binary).decode("utf-8", "surrogateescape"), len(binary)
+
+    def wtf8_search(encoding_name):
+        return codecs.CodecInfo(wtf8_enc, wtf8_dec, name="wtf-8")
+
+    codecs.register(wtf8_search)
+
+
+bad_good = {}
+good_bad = {}
+
+
+def enwin(txt):
+    return "".join([bad_good.get(x, x) for x in txt])
+
+    for bad, good in bad_good.items():
+        txt = txt.replace(bad, good)
+
+    return txt
+
+
+def dewin(txt):
+    return "".join([good_bad.get(x, x) for x in txt])
+
+    for bad, good in bad_good.items():
+        txt = txt.replace(good, bad)
+
+    return txt
+
+
 class CacheNode(object):
    def __init__(self, tag, data):
        self.tag = tag
@@ -115,8 +158,9 @@ class Stat(fuse.Stat):


 class Gateway(object):
-    def __init__(self, base_url):
+    def __init__(self, base_url, pw):
        self.base_url = base_url
+        self.pw = pw

        ui = urllib.parse.urlparse(base_url)
        self.web_root = ui.path.strip("/")
@@ -135,8 +179,7 @@ class Gateway(object):
        self.conns = {}

    def quotep(self, path):
-        # TODO: mojibake support
-        path = path.encode("utf-8", "ignore")
+        path = path.encode("wtf-8")
        return quote(path, safe="/")

    def getconn(self, tid=None):
@@ -159,20 +202,29 @@ class Gateway(object):
        except:
            pass

-    def sendreq(self, *args, **kwargs):
+    def sendreq(self, *args, **ka):
        tid = get_tid()
+        if self.pw:
+            ck = "cppwd=" + self.pw
+            try:
+                ka["headers"]["Cookie"] = ck
+            except:
+                ka["headers"] = {"Cookie": ck}
        try:
            c = self.getconn(tid)
-            c.request(*list(args), **kwargs)
+            c.request(*list(args), **ka)
            return c.getresponse()
        except:
            self.closeconn(tid)
            c = self.getconn(tid)
-            c.request(*list(args), **kwargs)
+            c.request(*list(args), **ka)
            return c.getresponse()

    def listdir(self, path):
-        web_path = self.quotep("/" + "/".join([self.web_root, path])) + "?dots"
+        if bad_good:
+            path = dewin(path)
+
+        web_path = self.quotep("/" + "/".join([self.web_root, path])) + "?dots&ls"
        r = self.sendreq("GET", web_path)
        if r.status != 200:
            self.closeconn()
@@ -182,9 +234,12 @@ class Gateway(object):
                )
            )

-        return self.parse_html(r)
+        return self.parse_jls(r)

    def download_file_range(self, path, ofs1, ofs2):
+        if bad_good:
+            path = dewin(path)
+
        web_path = self.quotep("/" + "/".join([self.web_root, path])) + "?raw"
        hdr_range = "bytes={}-{}".format(ofs1, ofs2 - 1)
        log("downloading {}".format(hdr_range))
@@ -200,40 +255,27 @@ class Gateway(object):

        return r.read()

-    def parse_html(self, datasrc):
-        ret = []
-        remainder = b""
-        ptn = re.compile(
-            r"^<tr><td>(-|DIR)</td><td><a [^>]+>([^<]+)</a></td><td>([^<]+)</td><td>([^<]+)</td></tr>$"
-        )
-
+    def parse_jls(self, datasrc):
+        rsp = b""
        while True:
-            buf = remainder + datasrc.read(4096)
-            # print('[{}]'.format(buf.decode('utf-8')))
+            buf = datasrc.read(1024 * 32)
            if not buf:
                break

-            remainder = b""
-            endpos = buf.rfind(b"\n")
-            if endpos >= 0:
-                remainder = buf[endpos + 1 :]
-                buf = buf[:endpos]
+            rsp += buf

-            lines = buf.decode("utf-8").split("\n")
-            for line in lines:
-                m = ptn.match(line)
-                if not m:
-                    # print(line)
-                    continue
+        rsp = json.loads(rsp.decode("utf-8"))
+        ret = []
+        for statfun, nodes in [
+            [self.stat_dir, rsp["dirs"]],
+            [self.stat_file, rsp["files"]],
+        ]:
+            for n in nodes:
+                fname = unquote(n["href"].split("?")[0]).rstrip(b"/").decode("wtf-8")
+                if bad_good:
+                    fname = enwin(fname)

-                ftype, fname, fsize, fdate = m.groups()
-                fname = html_dec(fname)
-                ts = datetime.strptime(fdate, "%Y-%m-%d %H:%M:%S").timestamp()
-                sz = int(fsize)
-                if ftype == "-":
-                    ret.append([fname, self.stat_file(ts, sz), 0])
-                else:
-                    ret.append([fname, self.stat_dir(ts, sz), 0])
+                ret.append([fname, statfun(n["ts"], n["sz"]), 0])

        return ret

@@ -262,6 +304,7 @@ class CPPF(Fuse):
        Fuse.__init__(self, *args, **kwargs)

        self.url = None
+        self.pw = None

        self.dircache = []
        self.dircache_mtx = threading.Lock()
@@ -271,7 +314,7 @@ class CPPF(Fuse):

    def init2(self):
        # TODO figure out how python-fuse wanted this to go
-        self.gw = Gateway(self.url)  # .decode('utf-8'))
+        self.gw = Gateway(self.url, self.pw)  # .decode('utf-8'))
        info("up")

    def clean_dircache(self):
@@ -536,6 +579,8 @@ class CPPF(Fuse):

    def getattr(self, path):
        log("getattr [{}]".format(path))
+        if WINDOWS:
+            path = enwin(path)  # windows occasionally decodes f0xx to xx

        path = path.strip("/")
        try:
@@ -568,9 +613,25 @@ class CPPF(Fuse):

 def main():
    time.strptime("19970815", "%Y%m%d")  # python#7980
+    register_wtf8()
+    if WINDOWS:
+        os.system("rem")
+
+        for ch in '<>:"\\|?*':
+            # microsoft maps illegal characters to f0xx
+            # (e000 to f8ff is basic-plane private-use)
+            bad_good[ch] = chr(ord(ch) + 0xF000)
+
+        for n in range(0, 0x100):
+            # map surrogateescape to another private-use area
+            bad_good[chr(n + 0xDC00)] = chr(n + 0xF100)
+
+        for k, v in bad_good.items():
+            good_bad[v] = k

    server = CPPF()
    server.parser.add_option(mountopt="url", metavar="BASE_URL", default=None)
+    server.parser.add_option(mountopt="pw", metavar="PASSWORD", default=None)
    server.parse(values=server, errex=1)
    if not server.url or not str(server.url).startswith("http"):
        print("\nerror:")
@@ -578,7 +639,7 @@ def main():
        print("  need argument: mount-path")
        print("example:")
        print(
-            "  ./copyparty-fuseb.py -f -o allow_other,auto_unmount,nonempty,url=http://192.168.1.69:3923 /mnt/nas"
+            "  ./copyparty-fuseb.py -f -o allow_other,auto_unmount,nonempty,pw=wark,url=http://192.168.1.69:3923 /mnt/nas"
        )
        sys.exit(1)

--- a/copyparty/version.py
+++ b/copyparty/version.py
@@ -1,8 +1,8 @@
 # coding: utf-8

-VERSION = (1, 1, 7)
+VERSION = (1, 1, 10)
 CODENAME = "opus"
-BUILD_DT = (2021, 12, 7)
+BUILD_DT = (2021, 12, 16)

 S_VERSION = ".".join(map(str, VERSION))
 S_BUILD_DT = "{0:04d}-{1:02d}-{2:02d}".format(*BUILD_DT)
--- a/copyparty/httpcli.py
+++ b/copyparty/httpcli.py
@@ -1367,6 +1367,9 @@ class HttpCli(object):
            try:
                fs_path = req_path + ext
                st = bos.stat(fs_path)
+                if stat.S_ISDIR(st.st_mode):
+                    continue
+
                file_ts = max(file_ts, st.st_mtime)
                editions[ext or "plain"] = [fs_path, st.st_size]
            except:
@@ -1512,11 +1515,12 @@ class HttpCli(object):
        with open_func(*open_args) as f:
            sendfun = sendfile_kern if use_sendfile else sendfile_py
            remains = sendfun(
-                lower, upper, f, self.s, self.args.s_wr_sz, self.args.s_wr_slp
+                self.log, lower, upper, f, self.s, self.args.s_wr_sz, self.args.s_wr_slp
            )

        if remains > 0:
            logmsg += " \033[31m" + unicode(upper - remains) + "\033[0m"
+            self.keepalive = False

        spd = self._spd((upper - lower) - remains)
        if self.do_log:
@@ -1959,6 +1963,13 @@ class HttpCli(object):
                fmt = "{{}}  {{:{},}}  {{}}"
                nfmt = "{:,}"

+            for x in dirs:
+                n = x["name"] + "/"
+                if arg == "v":
+                    n = "\033[94m" + n
+
+                x["name"] = n
+
            fmt = fmt.format(len(nfmt.format(biggest)))
            ret = [
                "# {}: {}".format(x, ls[x])
--- a/copyparty/mtag.py
+++ b/copyparty/mtag.py
@@ -418,7 +418,8 @@ class MTag(object):

        try:
            md = mutagen.File(fsenc(abspath), easy=True)
-            x = md.info.length
+            if not md.info.length and not md.info.codec:
+                raise Exception()
        except Exception as ex:
            return self.get_ffprobe(abspath) if self.can_ffprobe else {}

--- a/copyparty/tcpsrv.py
+++ b/copyparty/tcpsrv.py
@@ -77,6 +77,9 @@ class TcpSrv(object):
                if "pub" in title_vars and "external" in unicode(desc):
                    hits.append(("pub", ep))

+                if "pub" in title_vars or "all" in title_vars:
+                    hits.append(("all", ep))
+
                for var in title_vars:
                    if var.startswith("ip-") and ep.startswith(var[3:]):
                        hits.append((var, ep))
@@ -262,8 +265,8 @@ class TcpSrv(object):
        return eps

    def _set_wintitle(self, vars):
-        if "pub" not in vars:
-            vars["pub"] = {"Local-Only": 1}
+        vars["all"] = vars.get("all", {"Local-Only": 1})
+        vars["pub"] = vars.get("pub", vars["all"])

        vars2 = {}
        for k, eps in vars.items():
--- a/copyparty/util.py
+++ b/copyparty/util.py
@@ -1177,7 +1177,7 @@ def hashcopy(fin, fout):
    return tlen, hashobj.hexdigest(), digest_b64


-def sendfile_py(lower, upper, f, s, bufsz, slp):
+def sendfile_py(log, lower, upper, f, s, bufsz, slp):
    remains = upper - lower
    f.seek(lower)
    while remains > 0:
@@ -1197,17 +1197,24 @@ def sendfile_py(lower, upper, f, s, bufsz, slp):
    return 0


-def sendfile_kern(lower, upper, f, s, bufsz, slp):
+def sendfile_kern(log, lower, upper, f, s, bufsz, slp):
    out_fd = s.fileno()
    in_fd = f.fileno()
    ofs = lower
+    stuck = None
    while ofs < upper:
+        stuck = stuck or time.time()
        try:
            req = min(2 ** 30, upper - ofs)
            select.select([], [out_fd], [], 10)
            n = os.sendfile(out_fd, in_fd, ofs, req)
+            stuck = None
        except Exception as ex:
-            # print("sendfile: " + repr(ex))
+            d = time.time() - stuck
+            log("sendfile stuck for {:.3f} sec: {!r}".format(d, ex))
+            if d < 3600 and ex.errno == 11:  # eagain
+                continue
+
            n = 0

        if n <= 0:
--- a/copyparty/web/browser.css
+++ b/copyparty/web/browser.css
@@ -1101,7 +1101,7 @@ html.light #doc .line-highlight {
 #docul li {
 	margin: 0;
 }
-#tree #docul a {
+#tree #docul li+li a {
 	display: block;
 }
 #seldoc.sel {
@@ -1424,6 +1424,7 @@ html.light .opview input[type="text"] {
 	border-color: #38d;
 }
 html.light #u2tab a>span,
+html.light #docul .bn a>span,
 html.light #files td div span {
 	color: #000;
 }
@@ -2149,6 +2150,7 @@ html.light #u2foot .warn span {
 	border-color: #d06;
 }
 #u2tab a>span,
+#docul .bn a>span,
 #unpost a>span {
 	font-weight: bold;
 	font-style: italic;
--- a/copyparty/web/browser.js
+++ b/copyparty/web/browser.js
@@ -2477,7 +2477,7 @@ var showfile = (function () {
 	}

 	r.mktree = function () {
-		var html = ['<li class="bn">list of textfiles in<br />' + esc(get_vpath()) + '</li>'];
+		var html = ['<li class="bn">list of textfiles in<br />' + linksplit(get_vpath()).join('') + '</li>'];
 		for (var a = 0; a < r.files.length; a++) {
 			var file = r.files[a];
 			html.push('<li><a href="#" hl="' + file.id +
@@ -5010,6 +5010,7 @@ ebi('path').onclick = function (e) {
 	if (!treectl.spa || !a || !(a = a.getAttribute('href') + '') || !a.endsWith('/'))
 		return;

+	thegrid.setvis(true);
 	treectl.reqls(a, true, true);
 	return ev(e);
 };
@@ -5035,6 +5036,13 @@ ebi('files').onclick = ebi('docul').onclick = function (e) {
 		showfile.show(noq_href(ebi(tgt.getAttribute('hl'))), tgt.getAttribute('lang'));
 		return ev(e);
 	}
+
+	tgt = e.target.closest('a');
+	if (tgt && tgt.closest('li.bn')) {
+		thegrid.setvis(true);
+		treectl.goto(tgt.getAttribute('href'), true);
+		return ev(e);
+	}
 };


--- a/copyparty/web/up2k.js
+++ b/copyparty/web/up2k.js
@@ -1485,7 +1485,8 @@ function up2k_init(subtle) {
                    err.indexOf('NotFoundError') !== -1  // macos-firefox permissions
                ) {
                    pvis.seth(t.n, 1, 'OS-error');
-                    pvis.seth(t.n, 2, err);
+                    pvis.seth(t.n, 2, err + ' @ ' + car);
+                    console.log('OS-error', reader.error, '@', car);
                    handled = true;
                }

@@ -2113,7 +2114,7 @@ function up2k_init(subtle) {
    if (parallel_uploads < 1)
        bumpthread(1);

-    return { "init_deps": init_deps, "set_fsearch": set_fsearch, "ui": pvis }
+    return { "init_deps": init_deps, "set_fsearch": set_fsearch, "ui": pvis, "st": st, "uc": uc }
 }


--- a/docs/README.md
+++ b/docs/README.md
@@ -23,6 +23,15 @@ point `--css-browser` to one of these by URL:



+# utilities
+
+## [`multisearch.html`](multisearch.html)
+* takes a list of filenames of youtube rips, grabs the youtube-id of each file, and does a search on the server for those
+* use it by putting it somewhere on the server and opening it as an html page
+* also serves as an extendable template for other specific search behaviors
+
+
+
 # other stuff

 ## [`rclone.md`](rclone.md)
--- a/docs/multisearch.html
+++ b/docs/multisearch.html
@@ -0,0 +1,124 @@
+<!DOCTYPE html><html lang="en"><head>
+	<meta charset="utf-8">
+	<title>multisearch</title>
+	<meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <style>
+
+html, body {
+    margin: 0;
+    padding: 0;
+    color: #ddd;
+    background: #222;
+    font-family: sans-serif;
+}
+body {
+    padding: 1em;
+}
+a {
+    color: #fc5;
+}
+ul {
+    line-height: 1.5em;
+}
+code {
+    color: #fc5;
+    border: 1px solid #444;
+    padding: .1em .2em;
+    font-family: sans-serif, sans-serif;
+}
+#src {
+    display: block;
+    width: calc(100% - 1em);
+    padding: .5em;
+    margin: 0;
+}
+td {
+    padding-left: 1em;
+}
+.hit,
+.miss {
+    font-weight: bold;
+    padding-left: 0;
+    padding-top: 1em;
+}
+.hit {color: #af0;}
+.miss {color: #f0c;}
+.hit:before {content: '✅';}
+.miss:before {content: '❌';}
+
+</style></head><body>
+    <ul>
+        <li>paste a list of filenames (youtube rips) below and hit search</li>
+        <li>it will grab the youtube-id from the filenames and search for each id</li>
+        <li>filenames must be like <code>-YTID.webm</code> (youtube-dl style) or <code>[YTID].webm</code> (ytdlp style)</li>
+    </ul>
+    <textarea id="src"></textarea>
+    <button id="go">search</button>
+    <div id="res"></div>
+    <script>
+
+var ebi = document.getElementById.bind(document);
+function esc(txt) {
+    return txt.replace(/[&"<>]/g, function (c) {
+        return {
+            '&': '&amp;',
+            '"': '&quot;',
+            '<': '&lt;',
+            '>': '&gt;'
+        }[c];
+    });
+}
+
+ebi('go').onclick = async function() {
+    var queries = [];
+    for (var ln of ebi('src').value.split(/\n/g)) {
+        // filter the list of input files,
+        // only keeping youtube videos,
+        // meaning the filename ends with either
+        //   [YOUTUBEID].EXTENSION or
+        //   -YOUTUBEID.EXTENSION
+        var m = /[[-]([0-9a-zA-Z_-]{11})\]?\.(mp4|webm|mkv)$/.exec(ln);
+        if (!m || !(m = m[1]))
+            continue;
+
+        // create a search query for each line: name like *youtubeid*
+        queries.push([ln, `name like *${m}*`]);
+    }
+
+    var a = 0, html = ['<table>'], hits = [], misses = [];
+    for (var [fn, q] of queries) {
+        var r = await fetch('/?srch', {
+            method: 'POST',
+            body: JSON.stringify({'q': q})
+        });
+        r = await r.json();
+        
+        var cl, tab2;
+        if (r.hits.length) {
+            tab2 = hits;
+            cl = 'hit';
+        }
+        else {
+            tab2 = misses;
+            cl = 'miss';
+        }
+        var h = `<tr><td class="${cl}" colspan="9">${esc(fn)}</td></tr>`;
+        tab2.push(h);
+        html.push(h);
+        for (var h of r.hits) {
+            var link = `<a href="/${h.rp}">${esc(decodeURIComponent(h.rp))}</a>`;
+            html.push(`<tr><td>${h.sz}</td><td>${link}</td></tr>`);
+        }
+        ebi('res').innerHTML = `searching, ${++a} / ${queries.length} done, ${hits.length} hits, ${misses.length} miss`;
+    }
+    html.push('<tr><td><h1>hits:</h1></td></tr>');
+    html = html.concat(hits);
+
+    html.push('<tr><td><h1>miss:</h1></td></tr>');
+    html = html.concat(misses);
+
+    html.push('</table>');
+    ebi('res').innerHTML = html.join('\n');
+};
+
+</script></body></html>
Author	SHA1	Message	Date
ed	3ba0cc20f1	v1.1.10	2021-12-17 00:05:17 +01:00
ed	dd28de1796	sendfile: handle eagain	2021-12-17 00:04:19 +01:00
ed	9eecc9e19a	v1.1.9	2021-12-16 22:54:44 +01:00
ed	6530cb6b05	shut socket on tx error	2021-12-16 22:51:24 +01:00
ed	41ce613379	add multisearch	2021-12-12 20:11:07 +01:00
ed	5e2785caba	more aggressively try ffmpeg when mutagen fails	2021-12-11 20:31:04 +01:00
ed	d7cc000976	v1.1.8	2021-12-10 02:44:48 +01:00
ed	50d8ff95ae	good stuff	2021-12-10 02:21:56 +01:00
ed	b2de1459b6	quick backports to the alternative fuse client	2021-12-10 01:59:45 +01:00
ed	f0ffbea0b2	add breadcrumbs to the textfile tree	2021-12-10 00:44:47 +01:00