Blob Blame History Raw
From 504d7c417593f7402198886f68cd6b4363844035 Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Thu, 18 Jul 2019 12:26:48 -0400
Subject: [PATCH 07/71] use raw strings where possible to avoid escaping issues

---
 src/calibre/ebooks/oeb/transforms/split.py |  2 +-
 src/calibre/ebooks/pml/pmlml.py            | 24 +++++++++++-----------
 src/calibre/ebooks/readability/cleaners.py |  2 +-
 src/calibre/ebooks/rtf/rtfml.py            | 12 +++++------
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py
index d0e1a334ec..a42bae67a5 100644
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@@ -294,7 +294,7 @@ class FlowSplitter(object):
         body = self.get_body(root)
         if body is None:
             return False
-        txt = re.sub(u'\\s+|\\xa0', '',
+        txt = re.sub(ur'\s+|\xa0', '',
                 etree.tostring(body, method='text', encoding='unicode'))
         if len(txt) > 1:
             return False
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 772117321c..e99ec66226 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -155,7 +155,7 @@ class PMLMLizer(object):
 
     def get_anchor(self, page, aid):
         aid = self.get_anchor_id(page.href, aid)
-        return u'\\Q="%s"' % aid
+        return ur'\Q="%s"' % aid
 
     def remove_newlines(self, text):
         text = text.replace('\r\n', ' ')
@@ -186,10 +186,10 @@ class PMLMLizer(object):
         anchors = set(re.findall(r'(?<=\\Q=").+?(?=")', text))
         links = set(re.findall(r'(?<=\\q="#).+?(?=")', text))
         for unused in anchors.difference(links):
-            text = text.replace('\\Q="%s"' % unused, '')
+            text = text.replace(r'\Q="%s"' % unused, '')
 
         # Remove \Cn tags that are within \x and \Xn tags
-        text = re.sub(unicode_type(r'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)'), '\\g<t>\\g<a>\\g<b>\\g<t>', text)
+        text = re.sub(unicode_type(r'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)'), r'\g<t>\g<a>\g<b>\g<t>', text)
 
         # Replace bad characters.
         text = text.replace(u'\xc2', '')
@@ -259,7 +259,7 @@ class PMLMLizer(object):
                             '%s.png' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
                 text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])])
         elif tag == 'hr':
-            w = '\\w'
+            w = r'\w'
             width = elem.get('width')
             if width:
                 if not width.endswith('%'):
@@ -286,17 +286,17 @@ class PMLMLizer(object):
                     toc_title, toc_depth = self.toc[toc_page].get(toc_x, (None, 0))
                     if toc_title:
                         toc_depth = max(min(toc_depth, 4), 0)
-                        text.append('\\C%s="%s"' % (toc_depth, toc_title))
+                        text.append(r'\C%s="%s"' % (toc_depth, toc_title))
 
         # Process style information that needs holds a single tag.
         # Commented out because every page in an OEB book starts with this style.
         if style['page-break-before'] == 'always':
-            text.append('\\p')
+            text.append(r'\p')
 
         # Process basic PML tags.
         pml_tag = TAG_MAP.get(tag, None)
         if pml_tag and pml_tag not in tag_stack+tags:
-            text.append('\\%s' % pml_tag)
+            text.append(r'\%s' % pml_tag)
             tags.append(pml_tag)
 
         # Special processing of tags that require an argument.
@@ -311,7 +311,7 @@ class PMLMLizer(object):
                     if href not in self.link_hrefs.keys():
                         self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
                     href = '#%s' % self.link_hrefs[href]
-                    text.append('\\q="%s"' % href)
+                    text.append(r'\q="%s"' % href)
                     tags.append('q')
 
         # Anchor ids
@@ -325,14 +325,14 @@ class PMLMLizer(object):
         for s in STYLES:
             style_tag = s[1].get(style[s[0]], None)
             if style_tag and style_tag not in tag_stack+tags:
-                text.append('\\%s' % style_tag)
+                text.append('r\%s' % style_tag)
                 tags.append(style_tag)
 
         # margin left
         try:
             mms = int(float(style['margin-left']) * 100 / style.height)
             if mms:
-                text.append('\\T="%s%%"' % mms)
+                text.append(r'\T="%s%%"' % mms)
         except:
             pass
 
@@ -360,7 +360,7 @@ class PMLMLizer(object):
         #    text.append('\n\n')
 
         if style['page-break-after'] == 'always':
-            text.append('\\p')
+            text.append(r'\p')
 
         # Process text after this tag but not within another.
         if hasattr(elem, 'tail') and elem.tail:
@@ -382,5 +382,5 @@ class PMLMLizer(object):
                 if tag in ('c', 'r'):
                     text.append('\n\\%s' % tag)
                 else:
-                    text.append('\\%s' % tag)
+                    text.append(r'\%s' % tag)
         return text
diff --git a/src/calibre/ebooks/readability/cleaners.py b/src/calibre/ebooks/readability/cleaners.py
index 057fcf17b3..d30216c4d8 100644
--- a/src/calibre/ebooks/readability/cleaners.py
+++ b/src/calibre/ebooks/readability/cleaners.py
@@ -17,7 +17,7 @@ htmlstrip = re.compile("<"  # open
 
 def clean_attributes(html):
     while htmlstrip.search(html):
-        html = htmlstrip.sub('<\\1\\2>', html)
+        html = htmlstrip.sub(r'<\1\2>', html)
     return html
 
 
diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py
index d4b339c53c..7f6cc91c50 100644
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@@ -83,7 +83,7 @@ def txt2rtf(text):
     for x in text:
         val = ord(x)
         if val == 160:
-            buf.write(u'\\~')
+            buf.write(ur'\~')
         elif val <= 127:
             buf.write(unicode_type(x))
         else:
@@ -115,7 +115,7 @@ class RTFMLizer(object):
                         self.opts, self.opts.output_profile)
                 self.currently_dumping_item = item
                 output += self.dump_text(item.data.find(XHTML('body')), stylizer)
-                output += '{\\page }'
+                output += r'{\page }'
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to RTF markup...' % item.href)
             # Removing comments is needed as comments with -- inside them can
@@ -127,7 +127,7 @@ class RTFMLizer(object):
             stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
             self.currently_dumping_item = item
             output += self.dump_text(content.find(XHTML('body')), stylizer)
-            output += '{\\page }'
+            output += r'{\page }'
         output += self.footer()
         output = self.insert_images(output)
         output = self.clean_text(output)
@@ -259,7 +259,7 @@ class RTFMLizer(object):
                 block_start = ''
                 block_end = ''
                 if 'block' not in tag_stack:
-                    block_start = '{\\par\\pard\\hyphpar '
+                    block_start = r'{\par\pard\hyphpar '
                     block_end = '}'
                 text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end)
 
@@ -292,7 +292,7 @@ class RTFMLizer(object):
             end_tag =  tag_stack.pop()
             if end_tag != 'block':
                 if tag in BLOCK_TAGS:
-                    text += u'\\par\\pard\\plain\\hyphpar}'
+                    text += ur'\par\pard\plain\hyphpar}'
                 else:
                     text += u'}'
 
@@ -300,6 +300,6 @@ class RTFMLizer(object):
             if 'block' in tag_stack:
                 text += '%s' % txt2rtf(elem.tail)
             else:
-                text += '{\\par\\pard\\hyphpar %s}' % txt2rtf(elem.tail)
+                text += r'{\par\pard\hyphpar %s}' % txt2rtf(elem.tail)
 
         return text