fix textlines postprocess (82dcdf13) · Commits · git-mirror / BallonsTranslator

ballontranslator/dl/textdetector/ctd/inference.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -43,7 +43,7 @@ def model2annotations(model_path, img_dir_list, save_dir, save_json=False):
		for blk in blk_list:
		polys += blk.lines
		blk_xyxy.append(blk.xyxy)
		blk_dict_list.append(blk.to_dict(extra_info=True))
		blk_dict_list.append(blk.to_dict())
		blk_xyxy = xyxy2yolo(blk_xyxy, im_w, im_h)
		if blk_xyxy is not None:
		cls_list = [1] * len(blk_xyxy)

ballontranslator/dl/textdetector/textblock.py

+20 −15

Original line number	Diff line number	Diff line
		@@ -181,12 +181,11 @@ class TextBlock(object):
		def __getitem__(self, idx):
		return self.lines[idx]

		def to_dict(self, extra_info=False):
		def to_dict(self):
		blk_dict = copy.deepcopy(vars(self))
		return blk_dict

		def get_transformed_region(self, img, idx, textheight, maxwidth=None) -> np.ndarray :
		im_h, im_w = img.shape[:2]
		def get_transformed_region(self, img: np.ndarray, idx: int, textheight: int, maxwidth: int = None) -> np.ndarray :
		direction = 'v' if self.vertical else 'h'
		src_pts = np.array(self.lines[idx], dtype=np.float64)

		@@ -322,7 +321,7 @@ def sort_textblk_list(blk_list: List[TextBlock], im_w: int, im_h: int) -> List[T
		blk_list.sort(key=lambda blk: blk.sort_weight)
		return blk_list

		def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool, sort: bool = False) -> None:
		def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) -> None:
		lines = blk.lines_array()
		middle_pnts = (lines[:, [1, 2, 3, 0]] + lines) / 2
		vec_v = middle_pnts[:, 2] - middle_pnts[:, 0] # vertical vectors of textlines
		@@ -332,7 +331,10 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool
		v = np.sum(vec_v, axis=0)
		h = np.sum(vec_h, axis=0)
		norm_v, norm_h = np.linalg.norm(v), np.linalg.norm(h)
		vertical = eval_orientation and norm_v > norm_h
		if blk.language == 'ja':
		vertical = norm_v > norm_h
		else:
		vertical = norm_v > norm_h * 2
		# calcuate distance between textlines and origin
		if vertical:
		primary_vec, primary_norm = v, norm_v
		@@ -355,7 +357,6 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool
		if abs(blk.angle) < 3:
		blk.angle = 0
		blk.font_size = font_size
		if eval_orientation:
		blk.vertical = vertical
		blk.vec = primary_vec
		blk.norm = primary_norm
		@@ -440,8 +441,10 @@ def split_textblk(blk: TextBlock):
		return textblock_splitted, sub_blk_list

		def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[TextBlock]:
		blk_list, scattered_lines = [], {'ver': [], 'hor': []}
		blk_list: List[TextBlock] = []
		scattered_lines = {'ver': [], 'hor': []}
		for bbox, cls, conf in zip(*blks):
		# cls could give wrong result
		blk_list.append(TextBlock(bbox, language=LANG_LIST[cls]))

		# step1: filter & assign lines to textblocks
		@@ -473,7 +476,7 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[

		# step2: filter textblocks, sort & split textlines
		final_blk_list = []
		for ii, blk in enumerate(blk_list):
		for blk in blk_list:
		# filter textblocks
		if len(blk.lines) == 0:
		bx1, by1, bx2, by2 = blk.xyxy
		@@ -483,11 +486,15 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[
		continue
		xywh = np.array([[bx1, by1, bx2-bx1, by2-by1]])
		blk.lines = xywh2xyxypoly(xywh).reshape(-1, 4, 2).tolist()
		eval_orientation = blk.language != 'eng'
		examine_textblk(blk, im_w, im_h, eval_orientation, sort=True)
		examine_textblk(blk, im_w, im_h, sort=True)

		# split manga text if there is a distance gap
		textblock_splitted = blk.language == 'ja' and len(blk.lines) > 1
		textblock_splitted = False
		if len(blk.lines) > 1:
		if blk.language == 'ja':
		textblock_splitted = True
		elif blk.vertical:
		textblock_splitted = True
		if textblock_splitted:
		textblock_splitted, sub_blk_list = split_textblk(blk)
		else:
		@@ -505,13 +512,11 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[
		final_blk_list = sort_textblk_list(final_blk_list, im_w, im_h)

		for blk in final_blk_list:
		if blk.language == 'eng':
		blk.vertical = False
		if blk.language == 'eng' and not blk.vertical:
		num_lines = len(blk.lines)
		if num_lines == 0:
		continue
		# blk.line_spacing = blk.bounding_rect()[3] / num_lines / blk.font_size
		resize_ratio = 1.1
		expand_size = max(int(blk.font_size * 0.1), 2)
		rad = np.deg2rad(blk.angle)
		shifted_vec = np.array([[[-1, -1],[1, -1],[1, 1],[-1, 1]]])