fix coordinates overflow of textlines, close #552 (baba06ae) · Commits · git-mirror / BallonsTranslator

modules/textdetector/ctd/inference.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -343,7 +343,7 @@ class TextDetector:
		if lines.size == 0:
		lines = []
		else:
		lines = lines.astype(np.int32)
		lines = lines.astype(np.int64)
		blk_list = group_output(blks, lines, im_w, im_h, mask)
		mask_refined = refine_mask(img, mask, blk_list, refine_mode=refine_mode)
		if keep_undetected_mask:

modules/textdetector/db_utils.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -139,7 +139,7 @@ class SegDetectorRepresenter():
		height, width = bitmap.shape
		contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
		num_contours = min(len(contours), self.max_candidates)
		boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
		boxes = np.zeros((num_contours, 4, 2), dtype=np.int64)
		scores = np.zeros((num_contours,), dtype=np.float32)

		for index in range(num_contours):
		@@ -165,7 +165,7 @@ class SegDetectorRepresenter():

		box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
		box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height)
		boxes[index, :, :] = box.astype(np.int16)
		boxes[index, :, :] = box.astype(np.int64)
		scores[index] = score
		return boxes, scores

utils/textblock.py

+15 −3

Original line number	Diff line number	Diff line
		@@ -236,9 +236,21 @@ class TextBlock:
		return blk_dict

		def get_transformed_region(self, img: np.ndarray, idx: int, textheight: int, maxwidth: int = None) -> np.ndarray :

		line = np.round(np.array(self.lines[idx])).astype(np.int64)
		x1, y1, x2, y2 = line[:, 0].min(), line[:, 1].min(), line[:, 0].max(), line[:, 1].max()
		im_h, im_w = img.shape[:2]
		x1 = np.clip(x1, 0, im_w)
		y1 = np.clip(y1, 0, im_h)
		x2 = np.clip(x2, 0, im_w)
		y2 = np.clip(y2, 0, im_h)
		img_croped = img[y1: y2, x1: x2]

		direction = 'v' if self.src_is_vertical else 'h'

		src_pts = np.array(self.lines[idx], dtype=np.float64)
		src_pts = line.copy()
		src_pts[:, 0] -= x1
		src_pts[:, 1] -= y1
		middle_pnt = (src_pts[[1, 2, 3, 0]] + src_pts) / 2
		vec_v = middle_pnt[2] - middle_pnt[0] # vertical vectors of textlines
		vec_h = middle_pnt[1] - middle_pnt[3] # horizontal vectors of textlines
		@@ -264,7 +276,7 @@ class TextBlock:
		if M is None:
		print('invalid textpolygon to target img')
		return np.zeros((textheight, textheight, 3), dtype=np.uint8)
		region = cv2.warpPerspective(img, M, (w, h))
		region = cv2.warpPerspective(img_croped, M, (w, h))
		elif direction == 'v' :
		w = int(textheight)
		h = int(round(textheight * ratio))
		@@ -273,7 +285,7 @@ class TextBlock:
		if M is None:
		print('invalid textpolygon to target img')
		return np.zeros((textheight, textheight, 3), dtype=np.uint8)
		region = cv2.warpPerspective(img, M, (w, h))
		region = cv2.warpPerspective(img_croped, M, (w, h))
		region = cv2.rotate(region, cv2.ROTATE_90_COUNTERCLOCKWISE)

		if maxwidth is not None: