From 87dc396bb6bc877c52e5b99083980e4d23aa8848 Mon Sep 17 00:00:00 2001 From: Alexis Filipozzi <alexis.filipozzi@gmail.com> Date: Thu, 16 Jun 2016 20:24:49 +0200 Subject: [PATCH] small correction in ul translation --- data.py | 2 ++ html_parser.py | 19 ++++++++++--------- main.py | 3 +-- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/data.py b/data.py index f600fd7..4055f40 100644 --- a/data.py +++ b/data.py @@ -89,6 +89,8 @@ class TextContent(ParagraphContent): def get_string(self, clean = True): text = self._text + if not text: + text = "" if clean: text = text.strip() return text diff --git a/html_parser.py b/html_parser.py index 91a1e69..22f2156 100644 --- a/html_parser.py +++ b/html_parser.py @@ -2,11 +2,11 @@ from bs4 import BeautifulSoup from data import * def iterable(a): - try: - (x for x in a) - return True - except TypeError: - return False + try: + (x for x in a) + return True + except TypeError: + return False def iterable_and_not_string(a): return iterable(a) and not isinstance(a, basestring) @@ -88,10 +88,11 @@ class Parser: self._data._body.append(Paragraph(indent)) else: self.parse_paragraph(c) - if self._data._body[-1]._content: - last_content = self._data._body[-1]._content[-1] - if isinstance(last_content, TextContent): - last_content._new_line = True + if content.name == "li": + if self._data._body[-1]._content: + last_content = self._data._body[-1]._content[-1] + if isinstance(last_content, TextContent): + last_content._new_line = True def parse_image(self, image_content): self._data._body.append(self.return_parsed_image(image_content)) diff --git a/main.py b/main.py index 5e65628..bd203ad 100644 --- a/main.py +++ b/main.py @@ -32,12 +32,11 @@ def parse_and_format(dir_path, filename, dest): for sub_file in os.listdir(path_to_file): parse_and_format(path_to_file, sub_file, path_to_dst) elif os.path.isfile(path_to_file): - parse_format_and_write_file(path_to_file, path_to_dst) + parse_format_and_write_file(path_to_file, os.path.join(dest, filename.replace(".html", ".protoxml"))) else: print "Unhandled file type (probably symlink) with file " + str(path_to_file) def parse_format_and_write_file(filename, dest): - print "parse file" has_been_parsed = False parser = None formatter = None -- GitLab