diff --git a/html_parser.py b/html_parser.py index e3df8c321723ba6d84c1cf6c64afb4e9df05de00..c1376d590910a0c384e4be7f1c361d12a3754b3d 100644 --- a/html_parser.py +++ b/html_parser.py @@ -89,24 +89,29 @@ class Parser: def parse_list(self, list_content, indent=1): self._data._body.append(Paragraph(indent)) + line_returns = [] for content in list_content.contents: - self.parse_list_item(content, indent) + self.parse_list_item(content, indent, line_returns) + print(line_returns) + for ret_index in line_returns[0:-1]: + self._data._body[-1]._content[ret_index]._new_line = True - def parse_list_item(self, content, indent): + + def parse_list_item(self, content, indent, line_returns): + first_only = True for c in content: # remove li tag + print(c) if hasattr(c, "name") and c.name == "ul": last_content = self._data._body[-1]._content[-1] if isinstance(last_content, TextContent): - last_content._new_line = True + last_content._new_line = True # to emulate list in list self.parse_list(c, indent+1) self._data._body.append(Paragraph(indent)) else: self.parse_paragraph(c) if content.name == "li": - if self._data._body[-1]._content: - last_content = self._data._body[-1]._content[-1] - if isinstance(last_content, TextContent): - last_content._new_line = True + line_returns.append(len(self._data._body[-1]._content)) + def parse_image(self, image_content): self._data._body.append(self.return_parsed_image(image_content))