Removing is_py3 method and duplicated by urllib.

2026-04-11 07:33:35 +02:00 · 2020-04-19 21:22:24 +02:00
parent b66cbd2c1e
commit ef7e2b10be
35 changed files with 267 additions and 254 deletions
--- a/ebook_converter/ebooks/mobi/reader/mobi8.py
+++ b/ebook_converter/ebooks/mobi/reader/mobi8.py
@@ -1,7 +1,10 @@
-import struct, re, os
-from collections import namedtuple
-from itertools import repeat
-from uuid import uuid4
+import collections
+import itertools
+import os
+import re
+import struct
+import urllib.parse
+import uuid

 from lxml import etree

@@ -16,21 +19,20 @@ from ebook_converter.ebooks.mobi.utils import read_font_record
 from ebook_converter.ebooks.oeb.parse_utils import parse_html
 from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text
 from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode
-from ebook_converter.polyglot.urllib import urldefrag


 __license__ = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-Part = namedtuple('Part',
+Part = collections.namedtuple('Part',
    'num type filename start end aid')

-Elem = namedtuple('Elem',
+Elem = collections.namedtuple('Elem',
    'insert_pos toc_text file_number sequence_number start_pos '
    'length')

-FlowInfo = namedtuple('FlowInfo',
+FlowInfo = collections.namedtuple('FlowInfo',
        'type format dir fname')

 # locate beginning and ending positions of tag with specific aid attribute
@@ -81,7 +83,7 @@ class Mobi8Reader(object):

    def __call__(self):
        self.mobi6_reader.check_for_drm()
-        self.aid_anchor_suffix = uuid4().hex.encode('utf-8')
+        self.aid_anchor_suffix = uuid.uuid4().hex.encode('utf-8')
        bh = self.mobi6_reader.book_header
        if self.mobi6_reader.kf8_type == 'joint':
            offset = self.mobi6_reader.kf8_boundary + 2
@@ -127,7 +129,7 @@ class Mobi8Reader(object):
        if self.header.skelidx != NULL_INDEX:
            table = read_index(self.kf8_sections, self.header.skelidx,
                    self.header.codec)[0]
-            File = namedtuple('File',
+            File = collections.namedtuple('File',
                'file_number name divtbl_count start_position length')

            for i, text in enumerate(table):
@@ -149,7 +151,7 @@ class Mobi8Reader(object):
        if self.header.othidx != NULL_INDEX:
            table, cncx = read_index(self.kf8_sections, self.header.othidx,
                    self.header.codec)
-            Item = namedtuple('Item',
+            Item = collections.namedtuple('Item',
                'type title pos_fid')

            for i, ref_type in enumerate(table):
@@ -222,7 +224,7 @@ class Mobi8Reader(object):
            self.parts.append(skeleton)
            if divcnt < 1:
                # Empty file
-                aidtext = unicode_type(uuid4())
+                aidtext = unicode_type(uuid.uuid4())
                filename = aidtext + '.html'
            self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
                baseptr, aidtext))
@@ -293,7 +295,7 @@ class Mobi8Reader(object):
        for part in self.partinfo:
            if pos >= part.start and pos < part.end:
                return part
-        return Part(*repeat(None, len(Part._fields)))
+        return Part(*itertools.repeat(None, len(Part._fields)))

    def get_id_tag_by_pos_fid(self, posfid, offset):
        # first convert kindle:pos:fid and offset info to position in file
@@ -475,7 +477,7 @@ class Mobi8Reader(object):
            for ref in guide:
                if ref.type == 'toc':
                    href = ref.href()
-                    href, frag = urldefrag(href)
+                    href, frag = urllib.parse.urldefrag(href)
                    if os.path.exists(href.replace('/', os.sep)):
                        try:
                            toc = self.read_inline_toc(href, frag)
@@ -554,7 +556,7 @@ class Mobi8Reader(object):
            if reached and elem.tag == XHTML('a') and elem.get('href',
                    False):
                href = elem.get('href')
-                href, frag = urldefrag(href)
+                href, frag = urllib.parse.urldefrag(href)
                href = base_href + '/' + href
                text = xml2text(elem).strip()
                if (text, href, frag) in seen:
--- a/ebook_converter/ebooks/mobi/writer2/serializer.py
+++ b/ebook_converter/ebooks/mobi/writer2/serializer.py
@@ -1,7 +1,8 @@
+import collections
+import io
 import re
 import unicodedata
-from collections import defaultdict
-from io import BytesIO
+import urllib.parse

 from ebook_converter.ebooks.mobi.mobiml import MBP_NS
 from ebook_converter.ebooks.mobi.utils import is_guide_ref_start
@@ -9,7 +10,6 @@ from ebook_converter.ebooks.oeb.base import (
    OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
 )
 from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
-from ebook_converter.polyglot.urllib import urldefrag


 __license__ = 'GPL v3'
@@ -17,12 +17,12 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'


-class Buf(BytesIO):
+class Buf(io.BytesIO):

    def write(self, x):
        if isinstance(x, unicode_type):
            x = x.encode('utf-8')
-        BytesIO.write(self, x)
+        io.BytesIO.write(self, x)


 class Serializer(object):
@@ -63,7 +63,7 @@ class Serializer(object):
        # Mapping of hrefs (urlnormalized) to a list of offsets into the buffer
        # where filepos="..." elements are written corresponding to links that
        # point to the href. This is used at the end to fill in the correct values.
-        self.href_offsets = defaultdict(list)
+        self.href_offsets = collections.defaultdict(list)

        # List of offsets in the buffer of non linear items in the spine. These
        # become uncrossable breaks in the MOBI
@@ -81,7 +81,7 @@ class Serializer(object):
            item.is_article_start = item.is_article_end = False

        def spine_item(tocitem):
-            href = urldefrag(tocitem.href)[0]
+            href = urllib.parse.urldefrag(tocitem.href)[0]
            for item in self.oeb.spine:
                if item.href == href:
                    return item
@@ -157,7 +157,7 @@ class Serializer(object):
        hrefs = self.oeb.manifest.hrefs
        buf.write(b'<guide>')
        for ref in self.oeb.guide.values():
-            path = urldefrag(ref.href)[0]
+            path = urllib.parse.urldefrag(ref.href)[0]
            if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
                continue

@@ -188,7 +188,7 @@ class Serializer(object):
        '''
        hrefs = self.oeb.manifest.hrefs
        try:
-            path, frag = urldefrag(urlnormalize(href))
+            path, frag = urllib.parse.urldefrag(urlnormalize(href))
        except ValueError:
            # Unparseable URL
            return False
@@ -382,7 +382,7 @@ class Serializer(object):
            if href not in id_offsets:
                self.logger.warn('Hyperlink target %r not found' % href)
                # Link to the top of the document, better than just ignoring
-                href, _ = urldefrag(href)
+                href, _ = urllib.parse.urldefrag(href)
            if href in self.id_offsets:
                ioff = self.id_offsets[href]
                if is_start: