Commit 19cd3012 authored by dmMaze's avatar dmMaze
Browse files

fix doc import

parent 4655a99a
Loading
Loading
Loading
Loading
+5 −6
Original line number Diff line number Diff line
@@ -411,21 +411,21 @@ def merge_textlines(blk_list: List[TextBlock]) -> List[TextBlock]:
def split_textblk(blk: TextBlock):
    font_size, distance, lines = blk.font_size, blk.distance, blk.lines
    l0 = np.array(blk.lines[0])
    lines.sort(key=lambda line: np.linalg.norm(np.array(line[0] - l0[0])))
    lines.sort(key=lambda line: np.linalg.norm(np.array(line[0]) - l0[0]))
    distance_tol = font_size * 2
    current_blk = copy.deepcopy(blk)
    current_blk.lines = [lines[0]]
    current_blk.lines = [l0]
    sub_blk_list = [current_blk]
    textblock_splitted = False
    for jj, line in enumerate(lines[1:]):
        l1, l2 = Polygon(lines[jj]), Polygon(line)
        split = False
        if not l1.intersects(l2):
            line_disance = distance[jj+1] - distance[jj]
            line_disance = abs(distance[jj+1] - distance[jj])
            if line_disance > distance_tol:
                split = True
            else:
                if blk.vertical and abs(blk.angle) < 10 and len(current_blk.lines) > 1:
            elif blk.vertical and abs(blk.angle) < 15:
                if len(current_blk.lines) > 1 or line_disance > font_size:
                    split = abs(lines[jj][0][1] - line[0][1]) > font_size
        if split:
            current_blk = copy.deepcopy(current_blk)
@@ -483,7 +483,6 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[
                    continue
            xywh = np.array([[bx1, by1, bx2-bx1, by2-by1]])
            blk.lines = xywh2xyxypoly(xywh).reshape(-1, 4, 2).tolist()
        lines = blk.lines_array()
        eval_orientation = blk.language != 'eng'
        examine_textblk(blk, im_w, im_h, eval_orientation, sort=True)
        
+1 −7
Original line number Diff line number Diff line
@@ -249,8 +249,8 @@ class Canvas(QGraphicsScene):
        if scale_changed:
            self.adjustScrollBar(self.gv.horizontalScrollBar(), factor)
            self.adjustScrollBar(self.gv.verticalScrollBar(), factor)
        self.setSceneRect(0, 0, self.imgLayer.sceneBoundingRect().width(), self.imgLayer.sceneBoundingRect().height())
            self.scalefactor_changed.emit()
        self.setSceneRect(0, 0, self.imgLayer.sceneBoundingRect().width(), self.imgLayer.sceneBoundingRect().height())

    def onViewResized(self):
        gv_w, gv_h = self.gv.geometry().width(), self.gv.geometry().height()
@@ -280,12 +280,6 @@ class Canvas(QGraphicsScene):
    def keyPressEvent(self, event: QKeyEvent) -> None:
        if self.editing_textblkitem is not None:
            return super().keyPressEvent(event)
        # if event == QKeySequence.Undo:
        #     self.undo()
        #     self.txtblkShapeControl.updateBoundingRect()
        # elif event == QKeySequence.Redo:
        #     self.redo()
        #     self.txtblkShapeControl.updateBoundingRect()
        if event.key() == Qt.Key.Key_Alt:
            self.alt_pressed = True
        return super().keyPressEvent(event)
+24 −10
Original line number Diff line number Diff line
@@ -338,10 +338,7 @@ class ProjImgTrans:
        body_xml_str = doc._body._element.xml

        pages = {}
        pagename2idx = {}
        idx2pagename = {}
        bub_index = 0
        page_counter = 0
        for tbl in re.findall(r'<w:tbl>(.*?)</w:tbl>', body_xml_str, re.DOTALL):
            for tr in re.findall(r'<w:tr(.*?)>(.*?)</w:tr>', tbl, re.DOTALL):
                if re.findall(r'<pic:cNvPr id=\"0\" name=\"(.*?)\"/>', tr[1]):
@@ -361,20 +358,37 @@ class ProjImgTrans:
                    imgkey = meta_dict.pop("imgkey")
                    if not imgkey in pages:
                        pages[imgkey] = []
                        pagename2idx[imgkey] = page_counter
                        idx2pagename[page_counter] = imgkey
                        page_counter += 1
                    pages[imgkey].append(TextBlock(**meta_dict))
                    
                    if fin_page_signal is not None:
                        fin_page_signal.emit()

        self.merge_from_proj_dict(pages)
        if delete_tmp_folder:
            shutil.rmtree(tmp_bubble_folder)

    def merge_from_proj_dict(self, tgt_dict: Dict) -> Dict:
        if self.pages is None:
            self.pages = {}
        src_dict = self.pages if self.pages is not None else {}
        key_lst = list(dict.fromkeys(list(src_dict.keys()) + list(tgt_dict.keys())))
        key_lst.sort()
        rst_dict = {}
        pagename2idx = {}
        idx2pagename = {}
        page_counter = 0
        for key in key_lst:
            if key in src_dict and not key in tgt_dict:
                rst_dict[key] = src_dict[key]
            else:
                rst_dict[key] = tgt_dict[key]
            pagename2idx[key] = page_counter
            idx2pagename[page_counter] = key
            page_counter += 1
        self.pages.clear()
        self.pages.update(pages)
        self.pages.update(rst_dict)
        self._pagename2idx = pagename2idx
        self._idx2pagename = idx2pagename        
        if delete_tmp_folder:
            shutil.rmtree(tmp_bubble_folder)


def gen_ballon_cuts(cuts_dir: str, imgpath: str, blk_list: List[TextBlock], resize=True) -> Tuple[List[str], List[int]]: