Switch to internal defused XML implementation
This commit is contained in:
		
							parent
							
								
									73eed49b23
								
							
						
					
					
						commit
						988cccd2b8
					
				| @ -2274,7 +2274,6 @@ force-enable features with known issues on your OS/env  by setting any of the fo | ||||
| |-----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------| | ||||
| | `PRTY_FORCE_MP`             | force-enable multiprocessing (real multithreading) on MacOS and other broken platforms                                                                              | | ||||
| | `PRTY_FORCE_MAGIC`          | use [magic](https://pypi.org/project/python-magic/) on Windows (you will segfault)                                                                                  | | ||||
| | `PRTY_ALLOW_INSECURE_EXPAT` | allow using expat versions (bundled with python) that are vulnerable to xml attacks, [see the python docs](https://docs.python.org/3/library/xml.html#xml-security) | | ||||
| 
 | ||||
| 
 | ||||
| # packages | ||||
|  | ||||
| @ -65,6 +65,9 @@ DXMLParser = _DXMLParser | ||||
| 
 | ||||
| 
 | ||||
| def parse_xml(txt: str) -> ET.Element: | ||||
|     """ | ||||
|     Parse XML into an xml.etree.ElementTree.Element while defusing some unsafe parts. | ||||
|     """ | ||||
|     parser = DXMLParser() | ||||
|     parser.feed(txt) | ||||
|     return parser.close()  # type: ignore | ||||
|  | ||||
| @ -60,23 +60,8 @@ def have_ff(scmd: str) -> bool: | ||||
|     else: | ||||
|         return bool(shutil.which(scmd)) | ||||
| 
 | ||||
| def expat_is_secure(): | ||||
|     """ | ||||
|     From the python xml docs: | ||||
| 
 | ||||
|     An attacker can abuse XML features to carry out denial of service attacks, access local files, generate network connections to other machines, or circumvent firewalls. | ||||
|     Expat versions lower that 2.6.0 may be vulnerable to “billion laughs”, “quadratic blowup” and “large tokens”. Python may be vulnerable if it uses such older versions of Expat as a system-provided library. Check pyexpat.EXPAT_VERSION. | ||||
|     """ | ||||
|     import pyexpat | ||||
|     # expat_2.7.1 | ||||
|     if len(pyexpat.EXPAT_VERSION) < 11: | ||||
|         return False | ||||
|     major, minor, patch = (int(x) for x in pyexpat.EXPAT_VERSION[6:].split(".")) | ||||
|     return major > 2 or major == 2 and minor >= 6 | ||||
| 
 | ||||
| HAVE_FFMPEG = not os.environ.get("PRTY_NO_FFMPEG") and have_ff("ffmpeg") | ||||
| HAVE_FFPROBE = not os.environ.get("PRTY_NO_FFPROBE") and have_ff("ffprobe") | ||||
| HAVE_SECURE_EXPAT = os.environ.get("PRTY_ALLOW_INSECURE_EXPAT") or expat_is_secure() | ||||
| 
 | ||||
| CBZ_PICS = set("png jpg jpeg gif bmp tga tif tiff webp avif".split()) | ||||
| CBZ_01 = re.compile(r"(^|[^0-9v])0+[01]\b") | ||||
| @ -191,8 +176,7 @@ def au_unpk( | ||||
|             fi = zf.open(using) | ||||
| 
 | ||||
|         elif pk == "epub": | ||||
|             if HAVE_SECURE_EXPAT: | ||||
|                 fi = get_cover_from_epub(log, abspath) | ||||
|             fi = get_cover_from_epub(log, abspath) | ||||
| 
 | ||||
|         else: | ||||
|             raise Exception("unknown compression %s" % (pk,)) | ||||
| @ -385,7 +369,7 @@ def parse_ffprobe(txt: str) -> tuple[dict[str, tuple[int, Any]], dict[str, list[ | ||||
| 
 | ||||
| def get_cover_from_epub(log: "NamedLogger", abspath: str) -> IO[bytes] | None: | ||||
|     import zipfile | ||||
|     import xml.etree.ElementTree as ElTree | ||||
|     from .dxml import parse_xml | ||||
|     try: | ||||
|         from urlparse import urljoin  # Python2 | ||||
|     except ImportError: | ||||
| @ -394,21 +378,21 @@ def get_cover_from_epub(log: "NamedLogger", abspath: str) -> IO[bytes] | None: | ||||
|     with zipfile.ZipFile(abspath, "r") as z: | ||||
|         # First open the container file to find the package document (.opf file) | ||||
|         try: | ||||
|             container_root = ElTree.parse(z.open("META-INF/container.xml")) | ||||
|             container_root = parse_xml(z.read("META-INF/container.xml").decode()) | ||||
|         except KeyError: | ||||
|             log(f"epub: no container file found in {abspath}") | ||||
|             return None | ||||
| 
 | ||||
|         # https://www.w3.org/TR/epub-33/#sec-container.xml-rootfile-elem | ||||
|         container_namesapce = {"": "urn:oasis:names:tc:opendocument:xmlns:container"} | ||||
|         container_namespace = {"": "urn:oasis:names:tc:opendocument:xmlns:container"} | ||||
|         # One file could contain multiple package documents, default to the first one | ||||
|         rootfile_path = container_root\ | ||||
|             .find("./rootfiles/rootfile", container_namesapce)\ | ||||
|             .find("./rootfiles/rootfile", container_namespace)\ | ||||
|             .get("full-path") | ||||
| 
 | ||||
|         # Then open the first package document to find the path of the cover image | ||||
|         try: | ||||
|             package_root = ElTree.parse(z.open(rootfile_path)) | ||||
|             package_root = parse_xml(z.read(rootfile_path).decode()) | ||||
|         except KeyError: | ||||
|             log(f"epub: no package document found in {abspath}") | ||||
|             return None | ||||
| @ -489,9 +473,6 @@ class MTag(object): | ||||
|             self.log(msg.format(or_ffprobe, " " * 37, pyname), c=1) | ||||
|             return | ||||
| 
 | ||||
|         if not HAVE_SECURE_EXPAT: | ||||
|             self.log("expat version is missing critical security fixes; epub thumbnails will not be available", c=3) | ||||
| 
 | ||||
|         # https://picard-docs.musicbrainz.org/downloads/MusicBrainz_Picard_Tag_Map.html | ||||
|         tagmap = { | ||||
|             "album": ["album", "talb", "\u00a9alb", "original-album", "toal"], | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 AppleTheGolden
						AppleTheGolden