google isn't taking the hint
specifically google, but also some others, have started ignoring rel="nofollow" while also understanding just enough javascript to try viewing binary files as text
This commit is contained in:
		
							parent
							
								
									de2c978842
								
							
						
					
					
						commit
						99f63adf58
					
				| @ -40,6 +40,7 @@ from .cfg import flagcats, onedash | ||||
| from .svchub import SvcHub | ||||
| from .util import ( | ||||
|     APPLESAN_TXT, | ||||
|     BAD_BOTS, | ||||
|     DEF_EXP, | ||||
|     DEF_MTE, | ||||
|     DEF_MTH, | ||||
| @ -1244,6 +1245,7 @@ def add_optouts(ap): | ||||
|     ap2.add_argument("--zipmaxt", metavar="TXT", type=u, default="", help="custom errormessage when download size exceeds max (volflag=zipmaxt)") | ||||
|     ap2.add_argument("--zipmaxu", action="store_true", help="authenticated users bypass the zip size limit (volflag=zipmaxu)") | ||||
|     ap2.add_argument("--zip-who", metavar="LVL", type=int, default=3, help="who can download as zip/tar? [\033[32m0\033[0m]=nobody, [\033[32m1\033[0m]=admins, [\033[32m2\033[0m]=authenticated-with-read-access, [\033[32m3\033[0m]=everyone-with-read-access (volflag=zip_who)\n\033[1;31mWARNING:\033[0m if a nested volume has a more restrictive value than a parent volume, then this will be \033[33mignored\033[0m if the download is initiated from the parent, more lenient volume") | ||||
|     ap2.add_argument("--ua-nozip", metavar="PTN", type=u, default=BAD_BOTS, help="regex of user-agents to reject from download-as-zip/tar; disable with [\033[32mno\033[0m] or blank") | ||||
|     ap2.add_argument("--no-zip", action="store_true", help="disable download as zip/tar; same as \033[33m--zip-who=0\033[0m") | ||||
|     ap2.add_argument("--no-tarcmp", action="store_true", help="disable download as compressed tar (?tar=gz, ?tar=bz2, ?tar=xz, ?tar=gz:9, ...)") | ||||
|     ap2.add_argument("--no-lifetime", action="store_true", help="do not allow clients (or server config) to schedule an upload to be deleted after a given time") | ||||
| @ -1434,6 +1436,7 @@ def add_txt(ap): | ||||
|     ap2.add_argument("--exp", action="store_true", help="enable textfile expansion -- replace {{self.ip}} and such; see \033[33m--help-exp\033[0m (volflag=exp)") | ||||
|     ap2.add_argument("--exp-md", metavar="V,V,V", type=u, default=DEF_EXP, help="comma/space-separated list of placeholders to expand in markdown files; add/remove stuff on the default list with +hdr_foo or /vf.scan (volflag=exp_md)") | ||||
|     ap2.add_argument("--exp-lg", metavar="V,V,V", type=u, default=DEF_EXP, help="comma/space-separated list of placeholders to expand in prologue/epilogue files (volflag=exp_lg)") | ||||
|     ap2.add_argument("--ua-nodoc", metavar="PTN", type=u, default=BAD_BOTS, help="regex of user-agents to reject from viewing documents through ?doc=[...]; disable with [\033[32mno\033[0m] or blank") | ||||
| 
 | ||||
| 
 | ||||
| def add_og(ap): | ||||
|  | ||||
| @ -3807,6 +3807,9 @@ class HttpCli(object): | ||||
|             return "download-as-zip/tar is admin-only on this server" | ||||
|         elif lvl <= 2 and self.uname in ("", "*"): | ||||
|             return "you must be authenticated to download-as-zip/tar on this server" | ||||
|         elif self.args.ua_nozip and self.args.ua_nozip.search(self.ua): | ||||
|             t = "this URL contains no valuable information for bots/crawlers" | ||||
|             raise Pebkac(403, t) | ||||
|         return "" | ||||
| 
 | ||||
|     def tx_res(self, req_path: str) -> bool: | ||||
| @ -6291,6 +6294,10 @@ class HttpCli(object): | ||||
| 
 | ||||
|         doc = self.uparam.get("doc") if self.can_read else None | ||||
|         if doc: | ||||
|             zp = self.args.ua_nodoc | ||||
|             if zp and zp.search(self.ua): | ||||
|                 t = "this URL contains no valuable information for bots/crawlers" | ||||
|                 raise Pebkac(403, t) | ||||
|             j2a["docname"] = doc | ||||
|             doctxt = None | ||||
|             dfn = lnames.get(doc.lower()) | ||||
|  | ||||
| @ -769,7 +769,8 @@ class SvcHub(object): | ||||
|                 vs = os.path.expandvars(os.path.expanduser(vs)) | ||||
|                 setattr(al, k, vs) | ||||
| 
 | ||||
|         for k in "dav_ua1 sus_urls nonsus_urls".split(" "): | ||||
|         zs = "dav_ua1 sus_urls nonsus_urls ua_nodoc ua_nozip" | ||||
|         for k in zs.split(" "): | ||||
|             vs = getattr(al, k) | ||||
|             if not vs or vs == "no": | ||||
|                 setattr(al, k, None) | ||||
|  | ||||
| @ -245,6 +245,9 @@ SYMTIME = PY36 and os.utime in os.supports_follow_symlinks | ||||
| 
 | ||||
| META_NOBOTS = '<meta name="robots" content="noindex, nofollow">\n' | ||||
| 
 | ||||
| # smart enough to understand javascript while also ignoring rel="nofollow" | ||||
| BAD_BOTS = r"Barkrowler|bingbot|BLEXBot|Googlebot|GPTBot|PetalBot|SeekportBot|SemrushBot|YandexBot" | ||||
| 
 | ||||
| FFMPEG_URL = "https://www.gyan.dev/ffmpeg/builds/ffmpeg-git-full.7z" | ||||
| 
 | ||||
| URL_PRJ = "https://github.com/9001/copyparty" | ||||
|  | ||||
| @ -135,7 +135,7 @@ class Cfg(Namespace): | ||||
|         ex = "dav_inf dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip" | ||||
|         ka.update(**{k: True for k in ex.split()}) | ||||
| 
 | ||||
|         ex = "ah_cli ah_gen css_browser hist ipu js_browser js_other mime mimes no_forget no_hash no_idx nonsus_urls og_tpl og_ua" | ||||
|         ex = "ah_cli ah_gen css_browser hist ipu js_browser js_other mime mimes no_forget no_hash no_idx nonsus_urls og_tpl og_ua ua_nodoc ua_nozip" | ||||
|         ka.update(**{k: None for k in ex.split()}) | ||||
| 
 | ||||
|         ex = "hash_mt hsortn safe_dedup srch_time u2abort u2j u2sz" | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 ed
						ed