misc;
* scripts: add log repacker * bench/filehash: msys support + add more stats
This commit is contained in:
		
							parent
							
								
									143f72fe36
								
							
						
					
					
						commit
						dee0950f74
					
				| @ -12,6 +12,11 @@ set -euo pipefail | |||||||
| # | # | ||||||
| # can be adjusted with --hash-mt (but alpine caps out at 5) | # can be adjusted with --hash-mt (but alpine caps out at 5) | ||||||
| 
 | 
 | ||||||
|  | fsize=256 | ||||||
|  | nfiles=128 | ||||||
|  | pybin=$(command -v python3 || command -v python) | ||||||
|  | #pybin=~/.pyenv/versions/nogil-3.9.10-2/bin/python3 | ||||||
|  | 
 | ||||||
| [ $# -ge 1 ] || { | [ $# -ge 1 ] || { | ||||||
| 	echo 'need arg 1: path to copyparty-sfx.py' | 	echo 'need arg 1: path to copyparty-sfx.py' | ||||||
| 	echo ' (remaining args will be passed on to copyparty,' | 	echo ' (remaining args will be passed on to copyparty,' | ||||||
| @ -22,6 +27,8 @@ sfx="$1" | |||||||
| shift | shift | ||||||
| sfx="$(realpath "$sfx" || readlink -e "$sfx" || echo "$sfx")" | sfx="$(realpath "$sfx" || readlink -e "$sfx" || echo "$sfx")" | ||||||
| awk=$(command -v gawk || command -v awk) | awk=$(command -v gawk || command -v awk) | ||||||
|  | uname -s | grep -E MSYS && win=1 || win= | ||||||
|  | totalsize=$((fsize*nfiles)) | ||||||
| 
 | 
 | ||||||
| # try to use /dev/shm to avoid hitting filesystems at all, | # try to use /dev/shm to avoid hitting filesystems at all, | ||||||
| # otherwise fallback to mktemp which probably uses /tmp | # otherwise fallback to mktemp which probably uses /tmp | ||||||
| @ -30,20 +37,24 @@ mkdir $td || td=$(mktemp -d) | |||||||
| trap "rm -rf $td" INT TERM EXIT | trap "rm -rf $td" INT TERM EXIT | ||||||
| cd $td | cd $td | ||||||
| 
 | 
 | ||||||
| echo creating 256 MiB testfile in $td | echo creating $fsize MiB testfile in $td | ||||||
| head -c $((1024*1024*256)) /dev/urandom > 1 | sz=$((1024*1024*fsize)) | ||||||
|  | head -c $sz /dev/zero | openssl enc -aes-256-ctr -iter 1 -pass pass:k -nosalt 2>/dev/null >1 || true | ||||||
|  | wc -c 1 | awk '$1=='$sz'{r=1}END{exit 1-r}' || head -c $sz /dev/urandom >1 | ||||||
| 
 | 
 | ||||||
| echo creating 127 symlinks to it | echo creating $((nfiles-1)) symlinks to it | ||||||
| for n in $(seq 2 128); do ln -s 1 $n; done | for n in $(seq 2 $nfiles); do MSYS=winsymlinks:nativestrict ln -s 1 $n; done | ||||||
| 
 | 
 | ||||||
| echo warming up cache | echo warming up cache | ||||||
| cat 1 >/dev/null | cat 1 >/dev/null | ||||||
| 
 | 
 | ||||||
| echo ok lets go | echo ok lets go | ||||||
| python3 "$sfx" -p39204 -e2dsa --dbd=yolo --exit=idx -lo=t -q "$@" | $pybin "$sfx" -p39204 -e2dsa --dbd=yolo --exit=idx -lo=t -q "$@" && err= || err=$? | ||||||
|  | [ $win ] && [ $err = 15 ] && err=  # sigterm doesn't hook on windows, ah whatever | ||||||
|  | [ $err ] && echo ERROR $err && exit $err | ||||||
| 
 | 
 | ||||||
| echo and the results are... | echo and the results are... | ||||||
| LC_ALL=C $awk '/1 volumes in / {s=$(NF-1); printf "speed: %.1f MiB/s  (time=%.2fs)\n", 256*128/s, s}' <t | LC_ALL=C $awk '/1 volumes in / {s=$(NF-1); printf "speed: %.1f MiB/s  (time=%.2fs)\n", '$totalsize'/s, s}' <t | ||||||
| 
 | 
 | ||||||
| echo deleting $td and exiting | echo deleting $td and exiting | ||||||
| 
 | 
 | ||||||
| @ -52,6 +63,8 @@ echo deleting $td and exiting | |||||||
| 
 | 
 | ||||||
| # MiB/s @ cpu or device  (copyparty, pythonver, distro/os)  // comment | # MiB/s @ cpu or device  (copyparty, pythonver, distro/os)  // comment | ||||||
| 
 | 
 | ||||||
|  | #  3887 @ Ryzen 5 4500U  (cpp 1.9.5, nogil 3.9, fedora 39)  // --hash-mt=6; laptop | ||||||
|  | #  3732 @ Ryzen 5 4500U  (cpp 1.9.5, py 3.12.1, fedora 39)  // --hash-mt=6; laptop | ||||||
| #  3608 @ Ryzen 5 4500U  (cpp 1.9.5, py 3.11.5, fedora 38)  // --hash-mt=6; laptop | #  3608 @ Ryzen 5 4500U  (cpp 1.9.5, py 3.11.5, fedora 38)  // --hash-mt=6; laptop | ||||||
| #  2726 @ Ryzen 5 4500U  (cpp 1.9.5, py 3.11.5, fedora 38)  // --hash-mt=4 (old-default) | #  2726 @ Ryzen 5 4500U  (cpp 1.9.5, py 3.11.5, fedora 38)  // --hash-mt=4 (old-default) | ||||||
| #  2202 @ Ryzen 5 4500U  (cpp 1.9.5, py 3.11.5, docker-alpine 3.18.3) ??? alpine slow | #  2202 @ Ryzen 5 4500U  (cpp 1.9.5, py 3.11.5, docker-alpine 3.18.3) ??? alpine slow | ||||||
| @ -62,6 +75,10 @@ echo deleting $td and exiting | |||||||
| #  5544 @ Intel i5-12500 (cpp 1.9.5, py 3.11.2, debian 12.0)  // --hash-mt=12; desktop | #  5544 @ Intel i5-12500 (cpp 1.9.5, py 3.11.2, debian 12.0)  // --hash-mt=12; desktop | ||||||
| #  5197 @ Ryzen 7 3700X  (cpp 1.9.5, py 3.9.18, freebsd 13.2)  // --hash-mt=8; 2u server | #  5197 @ Ryzen 7 3700X  (cpp 1.9.5, py 3.9.18, freebsd 13.2)  // --hash-mt=8; 2u server | ||||||
| #  4551 @ mbp 2020 m1    (cpp 1.9.5, py 3.11.7, macos 14.2.1) | #  4551 @ mbp 2020 m1    (cpp 1.9.5, py 3.11.7, macos 14.2.1) | ||||||
|  | #  4190 @ Ryzen 7 5800X  (cpp 1.9.5, py 3.11.6, fedora 37)  // --hash-mt=8 (vbox-VM on win10-17763.4974) | ||||||
|  | #  3028 @ Ryzen 7 5800X  (cpp 1.9.5, py 3.11.6, fedora 37)  // --hash-mt=5 (vbox-VM on win10-17763.4974) | ||||||
|  | #  2629 @ Ryzen 7 5800X  (cpp 1.9.5, py 3.11.7, win10-ltsc-1809-17763.4974)  // --hash-mt=5 (default) | ||||||
|  | #  2576 @ Ryzen 7 5800X  (cpp 1.9.5, py 3.11.7, win10-ltsc-1809-17763.4974)  // --hash-mt=8 (hello??) | ||||||
| #  2606 @ Ryzen 7 3700X  (cpp 1.9.5, py 3.9.18, freebsd 13.2)  // --hash-mt=4 (old-default) | #  2606 @ Ryzen 7 3700X  (cpp 1.9.5, py 3.9.18, freebsd 13.2)  // --hash-mt=4 (old-default) | ||||||
| #  1436 @ Ryzen 5 5500U  (cpp 1.9.5, py 3.11.4, alpine 3.18.3)  // nuc | #  1436 @ Ryzen 5 5500U  (cpp 1.9.5, py 3.11.4, alpine 3.18.3)  // nuc | ||||||
| #  1065 @ Pixel 7        (cpp 1.9.5, py 3.11.5, termux 2023-09) | #  1065 @ Pixel 7        (cpp 1.9.5, py 3.11.5, termux 2023-09) | ||||||
|  | |||||||
							
								
								
									
										73
									
								
								scripts/logpack.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										73
									
								
								scripts/logpack.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,73 @@ | |||||||
|  | #!/bin/bash | ||||||
|  | set -e | ||||||
|  | 
 | ||||||
|  | # recompress logs so they decompress faster + save some space; | ||||||
|  | # * will not recurse into subfolders | ||||||
|  | # * each file in current folder gets recompressed to zstd; input file is DELETED | ||||||
|  | # * any xz-compressed logfiles are decompressed before converting to zstd | ||||||
|  | # * SHOULD ignore and skip files which are currently open; SHOULD be safe to run while copyparty is running | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # for files larger than $cutoff, compress with `zstd -T0` | ||||||
|  | # (otherwise do several files in parallel (scales better)) | ||||||
|  | cutoff=400M | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # osx support: | ||||||
|  | # port install findutils gsed coreutils | ||||||
|  | command -v gfind >/dev/null && | ||||||
|  | command -v gsed  >/dev/null && | ||||||
|  | command -v gsort >/dev/null && { | ||||||
|  |     find() { gfind "$@"; } | ||||||
|  |     sed()  { gsed  "$@"; } | ||||||
|  |     sort() { gsort "$@"; } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | packfun() { | ||||||
|  |     local jobs=$1 fn="$2" | ||||||
|  |     printf '%s\n' "$fn" | grep -qF .zst && return | ||||||
|  | 
 | ||||||
|  |     local of="$(printf '%s\n' "$fn" | sed -r 's/\.(xz|txt)/.zst/')" | ||||||
|  |     [ "$fn" = "$of" ] && | ||||||
|  |         of="$of.zst" | ||||||
|  | 
 | ||||||
|  |     [ -e "$of" ] && | ||||||
|  |         echo "SKIP: output file exists: $of" && | ||||||
|  |         return | ||||||
|  | 
 | ||||||
|  |     lsof -- "$fn" 2>/dev/null | grep -E .. && | ||||||
|  |         printf "SKIP: file in use: %s\n\n" $fn && | ||||||
|  |         return | ||||||
|  | 
 | ||||||
|  |     # determine by header; old copyparty versions would produce xz output without .xz names | ||||||
|  |     head -c3 "$fn" | grep -qF 7z && | ||||||
|  |         cmd="xz -dkc" || cmd="cat" | ||||||
|  | 
 | ||||||
|  |     printf '<%s> T%d: %s\n' "$cmd" $jobs "$of" | ||||||
|  | 
 | ||||||
|  |     $cmd <"$fn" >/dev/null || { | ||||||
|  |         echo "ERROR: uncompress failed: $fn" | ||||||
|  |         return | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     $cmd <"$fn" | zstd --long -19 -T$jobs >"$of" | ||||||
|  |     touch -r "$fn" -- "$of" | ||||||
|  | 
 | ||||||
|  |     cmp <($cmd <"$fn") <(zstd -d <"$of") || { | ||||||
|  |         echo "ERROR: data mismatch: $of" | ||||||
|  |         mv "$of"{,.BAD} | ||||||
|  |         return | ||||||
|  |     } | ||||||
|  |     rm -- "$fn" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # do small files in parallel first (in descending size); | ||||||
|  | # each file can use 4 threads in case the cutoff is poor | ||||||
|  | export -f packfun | ||||||
|  | export -f sed 2>/dev/null || true | ||||||
|  | find -maxdepth 1 -type f -size -$cutoff -printf '%s %p\n' | | ||||||
|  | sort -nr | sed -r 's`[^ ]+ ``; s`^\./``' | tr '\n' '\0' | | ||||||
|  | xargs "$@" -0i -P$(nproc) bash -c 'packfun 4 "$@"' _ {} | ||||||
|  | 
 | ||||||
|  | # then the big ones, letting each file use the whole cpu | ||||||
|  | for f in *; do packfun 0 "$f"; done | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 ed
						ed