add option --iobuf (file r/w buffersize):
				
					
				
			the default (256 KiB) appears optimal in the most popular scenario (linux host with storage on local physical disk, usually NVMe) was previously a mix of 64 and 512 KiB; now the same value is enforced everywhere download-as-tar is now 20% faster with the default value
This commit is contained in:
		
							parent
							
								
									d30ae8453d
								
							
						
					
					
						commit
						2b24c50eb7
					
				| @ -869,6 +869,7 @@ def add_fs(ap): | ||||
|     ap2 = ap.add_argument_group("filesystem options") | ||||
|     rm_re_def = "5/0.1" if ANYWIN else "0/0" | ||||
|     ap2.add_argument("--rm-retry", metavar="T/R", type=u, default=rm_re_def, help="if a file cannot be deleted because it is busy, continue trying for \033[33mT\033[0m seconds, retry every \033[33mR\033[0m seconds; disable with 0/0 (volflag=rm_retry)") | ||||
|     ap2.add_argument("--iobuf", metavar="BYTES", type=int, default=256*1024, help="file I/O buffer-size; if your volumes are on a network drive, try increasing to \033[32m524288\033[0m or even \033[32m4194304\033[0m (and let me know if that improves your performance)") | ||||
| 
 | ||||
| 
 | ||||
| def add_upload(ap): | ||||
|  | ||||
| @ -218,7 +218,7 @@ class FtpFs(AbstractedFS): | ||||
|                 raise FSE("Cannot open existing file for writing") | ||||
| 
 | ||||
|         self.validpath(ap) | ||||
|         return open(fsenc(ap), mode) | ||||
|         return open(fsenc(ap), mode, self.args.iobuf) | ||||
| 
 | ||||
|     def chdir(self, path: str) -> None: | ||||
|         nwd = join(self.cwd, path) | ||||
|  | ||||
| @ -174,7 +174,6 @@ class HttpCli(object): | ||||
|         self.parser: Optional[MultipartParser] = None | ||||
|         # end placeholders | ||||
| 
 | ||||
|         self.bufsz = 1024 * 32 | ||||
|         self.html_head = "" | ||||
| 
 | ||||
|     def log(self, msg: str, c: Union[int, str] = 0) -> None: | ||||
| @ -1641,7 +1640,7 @@ class HttpCli(object): | ||||
|         bos.makedirs(fdir) | ||||
| 
 | ||||
|         open_ka: dict[str, Any] = {"fun": open} | ||||
|         open_a = ["wb", 512 * 1024] | ||||
|         open_a = ["wb", self.args.iobuf] | ||||
| 
 | ||||
|         # user-request || config-force | ||||
|         if ("gz" in vfs.flags or "xz" in vfs.flags) and ( | ||||
| @ -1900,7 +1899,7 @@ class HttpCli(object): | ||||
|         f.seek(ofs) | ||||
|         with open(fp, "wb") as fo: | ||||
|             while nrem: | ||||
|                 buf = f.read(min(nrem, 512 * 1024)) | ||||
|                 buf = f.read(min(nrem, self.args.iobuf)) | ||||
|                 if not buf: | ||||
|                     break | ||||
| 
 | ||||
| @ -2162,7 +2161,7 @@ class HttpCli(object): | ||||
|                     except: | ||||
|                         pass | ||||
| 
 | ||||
|             f = f or open(fsenc(path), "rb+", 512 * 1024) | ||||
|             f = f or open(fsenc(path), "rb+", self.args.iobuf) | ||||
| 
 | ||||
|             try: | ||||
|                 f.seek(cstart[0]) | ||||
| @ -2185,7 +2184,8 @@ class HttpCli(object): | ||||
|                     ) | ||||
|                     ofs = 0 | ||||
|                     while ofs < chunksize: | ||||
|                         bufsz = min(chunksize - ofs, 4 * 1024 * 1024) | ||||
|                         bufsz = max(4 * 1024 * 1024, self.args.iobuf) | ||||
|                         bufsz = min(chunksize - ofs, bufsz) | ||||
|                         f.seek(cstart[0] + ofs) | ||||
|                         buf = f.read(bufsz) | ||||
|                         for wofs in cstart[1:]: | ||||
| @ -2482,7 +2482,7 @@ class HttpCli(object): | ||||
|                         v2 = lim.dfv - lim.dfl | ||||
|                         max_sz = min(v1, v2) if v1 and v2 else v1 or v2 | ||||
| 
 | ||||
|                     with ren_open(tnam, "wb", 512 * 1024, **open_args) as zfw: | ||||
|                     with ren_open(tnam, "wb", self.args.iobuf, **open_args) as zfw: | ||||
|                         f, tnam = zfw["orz"] | ||||
|                         tabspath = os.path.join(fdir, tnam) | ||||
|                         self.log("writing to {}".format(tabspath)) | ||||
| @ -2778,7 +2778,7 @@ class HttpCli(object): | ||||
|         if bos.path.exists(fp): | ||||
|             wunlink(self.log, fp, vfs.flags) | ||||
| 
 | ||||
|         with open(fsenc(fp), "wb", 512 * 1024) as f: | ||||
|         with open(fsenc(fp), "wb", self.args.iobuf) as f: | ||||
|             sz, sha512, _ = hashcopy(p_data, f, self.args.s_wr_slp) | ||||
| 
 | ||||
|         if lim: | ||||
| @ -3010,8 +3010,7 @@ class HttpCli(object): | ||||
|             upper = gzip_orig_sz(fs_path) | ||||
|         else: | ||||
|             open_func = open | ||||
|             # 512 kB is optimal for huge files, use 64k | ||||
|             open_args = [fsenc(fs_path), "rb", 64 * 1024] | ||||
|             open_args = [fsenc(fs_path), "rb", self.args.iobuf] | ||||
|             use_sendfile = ( | ||||
|                 # fmt: off | ||||
|                 not self.tls | ||||
| @ -3146,6 +3145,7 @@ class HttpCli(object): | ||||
| 
 | ||||
|         bgen = packer( | ||||
|             self.log, | ||||
|             self.args, | ||||
|             fgen, | ||||
|             utf8="utf" in uarg, | ||||
|             pre_crc="crc" in uarg, | ||||
| @ -3223,7 +3223,7 @@ class HttpCli(object): | ||||
|         sz_md = 0 | ||||
|         lead = b"" | ||||
|         fullfile = b"" | ||||
|         for buf in yieldfile(fs_path): | ||||
|         for buf in yieldfile(fs_path, self.args.iobuf): | ||||
|             if sz_md < max_sz: | ||||
|                 fullfile += buf | ||||
|             else: | ||||
| @ -3296,7 +3296,7 @@ class HttpCli(object): | ||||
|             if fullfile: | ||||
|                 self.s.sendall(fullfile) | ||||
|             else: | ||||
|                 for buf in yieldfile(fs_path): | ||||
|                 for buf in yieldfile(fs_path, self.args.iobuf): | ||||
|                     self.s.sendall(html_bescape(buf)) | ||||
| 
 | ||||
|             self.s.sendall(html[1]) | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import print_function, unicode_literals | ||||
| 
 | ||||
| import argparse | ||||
| import re | ||||
| import stat | ||||
| import tarfile | ||||
| @ -44,11 +45,12 @@ class StreamTar(StreamArc): | ||||
|     def __init__( | ||||
|         self, | ||||
|         log: "NamedLogger", | ||||
|         args: argparse.Namespace, | ||||
|         fgen: Generator[dict[str, Any], None, None], | ||||
|         cmp: str = "", | ||||
|         **kwargs: Any | ||||
|     ): | ||||
|         super(StreamTar, self).__init__(log, fgen) | ||||
|         super(StreamTar, self).__init__(log, args, fgen) | ||||
| 
 | ||||
|         self.ci = 0 | ||||
|         self.co = 0 | ||||
| @ -126,7 +128,7 @@ class StreamTar(StreamArc): | ||||
|         inf.gid = 0 | ||||
| 
 | ||||
|         self.ci += inf.size | ||||
|         with open(fsenc(src), "rb", 512 * 1024) as fo: | ||||
|         with open(fsenc(src), "rb", self.args.iobuf) as fo: | ||||
|             self.tar.addfile(inf, fo) | ||||
| 
 | ||||
|     def _gen(self) -> None: | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import print_function, unicode_literals | ||||
| 
 | ||||
| import argparse | ||||
| import os | ||||
| import tempfile | ||||
| from datetime import datetime | ||||
| @ -20,10 +21,12 @@ class StreamArc(object): | ||||
|     def __init__( | ||||
|         self, | ||||
|         log: "NamedLogger", | ||||
|         args: argparse.Namespace, | ||||
|         fgen: Generator[dict[str, Any], None, None], | ||||
|         **kwargs: Any | ||||
|     ): | ||||
|         self.log = log | ||||
|         self.args = args | ||||
|         self.fgen = fgen | ||||
|         self.stopped = False | ||||
| 
 | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import print_function, unicode_literals | ||||
| 
 | ||||
| import argparse | ||||
| import calendar | ||||
| import stat | ||||
| import time | ||||
| @ -218,12 +219,13 @@ class StreamZip(StreamArc): | ||||
|     def __init__( | ||||
|         self, | ||||
|         log: "NamedLogger", | ||||
|         args: argparse.Namespace, | ||||
|         fgen: Generator[dict[str, Any], None, None], | ||||
|         utf8: bool = False, | ||||
|         pre_crc: bool = False, | ||||
|         **kwargs: Any | ||||
|     ) -> None: | ||||
|         super(StreamZip, self).__init__(log, fgen) | ||||
|         super(StreamZip, self).__init__(log, args, fgen) | ||||
| 
 | ||||
|         self.utf8 = utf8 | ||||
|         self.pre_crc = pre_crc | ||||
| @ -248,7 +250,7 @@ class StreamZip(StreamArc): | ||||
| 
 | ||||
|         crc = 0 | ||||
|         if self.pre_crc: | ||||
|             for buf in yieldfile(src): | ||||
|             for buf in yieldfile(src, self.args.iobuf): | ||||
|                 crc = zlib.crc32(buf, crc) | ||||
| 
 | ||||
|             crc &= 0xFFFFFFFF | ||||
| @ -257,7 +259,7 @@ class StreamZip(StreamArc): | ||||
|         buf = gen_hdr(None, name, sz, ts, self.utf8, crc, self.pre_crc) | ||||
|         yield self._ct(buf) | ||||
| 
 | ||||
|         for buf in yieldfile(src): | ||||
|         for buf in yieldfile(src, self.args.iobuf): | ||||
|             if not self.pre_crc: | ||||
|                 crc = zlib.crc32(buf, crc) | ||||
| 
 | ||||
|  | ||||
| @ -340,6 +340,9 @@ class Tftpd(object): | ||||
|         if not self.args.tftp_nols and bos.path.isdir(ap): | ||||
|             return self._ls(vpath, "", 0, True) | ||||
| 
 | ||||
|         if not a: | ||||
|             a = [self.args.iobuf] | ||||
| 
 | ||||
|         return open(ap, mode, *a, **ka) | ||||
| 
 | ||||
|     def _mkdir(self, vpath: str, *a) -> None: | ||||
|  | ||||
| @ -3920,7 +3920,7 @@ class Up2k(object): | ||||
|         csz = up2k_chunksize(fsz) | ||||
|         ret = [] | ||||
|         suffix = " MB, {}".format(path) | ||||
|         with open(fsenc(path), "rb", 512 * 1024) as f: | ||||
|         with open(fsenc(path), "rb", self.args.iobuf) as f: | ||||
|             if self.mth and fsz >= 1024 * 512: | ||||
|                 tlt = self.mth.hash(f, fsz, csz, self.pp, prefix, suffix) | ||||
|                 ret = [x[0] for x in tlt] | ||||
|  | ||||
| @ -2361,10 +2361,11 @@ def build_netmap(csv: str): | ||||
|     return NetMap(ips, cidrs, True) | ||||
| 
 | ||||
| 
 | ||||
| def yieldfile(fn: str) -> Generator[bytes, None, None]: | ||||
|     with open(fsenc(fn), "rb", 512 * 1024) as f: | ||||
| def yieldfile(fn: str, bufsz: int) -> Generator[bytes, None, None]: | ||||
|     readsz = min(bufsz, 128 * 1024) | ||||
|     with open(fsenc(fn), "rb", bufsz) as f: | ||||
|         while True: | ||||
|             buf = f.read(128 * 1024) | ||||
|             buf = f.read(readsz) | ||||
|             if not buf: | ||||
|                 break | ||||
| 
 | ||||
|  | ||||
| @ -234,8 +234,9 @@ def u8(gen): | ||||
| 
 | ||||
| 
 | ||||
| def yieldfile(fn): | ||||
|     with open(fn, "rb") as f: | ||||
|         for block in iter(lambda: f.read(64 * 1024), b""): | ||||
|     s = 64 * 1024 | ||||
|     with open(fn, "rb", s * 4) as f: | ||||
|         for block in iter(lambda: f.read(s), b""): | ||||
|             yield block | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -147,6 +147,7 @@ class Cfg(Namespace): | ||||
|             dbd="wal", | ||||
|             fk_salt="a" * 16, | ||||
|             idp_gsep=re.compile("[|:;+,]"), | ||||
|             iobuf=256 * 1024, | ||||
|             lang="eng", | ||||
|             log_badpwd=1, | ||||
|             logout=573, | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 ed
						ed