Skip to content

Differential Binarization model #2095

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 40 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ed97271
ImageText detector preprocessor for Differential Binarization model
mehtamansi29 Feb 12, 2025
d97f362
db_utils functions and testfile
mehtamansi29 Mar 11, 2025
de3aaae
Diffbin utils function and test file
mehtamansi29 Mar 11, 2025
9a3cf2a
diffbin utils function and testfile
mehtamansi29 Apr 8, 2025
93ad1ba
diffbin preprocessing function
mehtamansi29 May 12, 2025
7268535
diffbin postprocessing function
mehtamansi29 May 14, 2025
f1c3734
diffbin postprocessing function_1
mehtamansi29 May 14, 2025
d3c74c9
diffbin postprocessing function_2
mehtamansi29 May 14, 2025
aafef9e
diffbin postprocessing function_3
mehtamansi29 May 14, 2025
352a089
Merge branch 'keras-team:master' into diffbin
mehtamansi29 May 20, 2025
d94a2e6
diffbin preocessing and db_utils completed
mehtamansi29 May 20, 2025
0028b90
Merge branch 'keras-team:master' into diffbin
mehtamansi29 May 26, 2025
d4724d9
diffbin_backbone model creation and backboone test for diffbin segmen…
mehtamansi29 May 26, 2025
3c75f47
Merge branch 'keras-team:master' into diffbin
mehtamansi29 Jun 2, 2025
d41dc34
modifited diffbin _textdetector
mehtamansi29 Jun 2, 2025
4b602c4
Updates image_text_detector preprocessor
mehtamansi29 Jun 3, 2025
ee2dced
Updates image_text_detector preprocessor with ignores argument
mehtamansi29 Jun 3, 2025
736b0c9
Updates image_text_detector preprocessor,db_utils and formatting with…
mehtamansi29 Jun 4, 2025
fcfed6a
Updates image_text_detector_1
mehtamansi29 Jun 4, 2025
98e2fbc
Updates image_text_detector_1
mehtamansi29 Jun 4, 2025
5fcaefc
Updates image_text_detector_3
mehtamansi29 Jun 4, 2025
19c4e79
Updates image_text_detector_3
mehtamansi29 Jun 4, 2025
a5516dc
Updates image_text_detector_4
mehtamansi29 Jun 4, 2025
b46db73
Updates image_text_detector_5
mehtamansi29 Jun 4, 2025
8c42e56
Updates image_text_detector_6
mehtamansi29 Jun 4, 2025
6b528a2
Updates image_text_detector_7
mehtamansi29 Jun 4, 2025
df67b6c
annotation size
mehtamansi29 Jun 5, 2025
34cc866
Merge branch 'keras-team:master' into diffbin
mehtamansi29 Jun 5, 2025
876f1af
fill poly keras chages
mehtamansi29 Jun 5, 2025
9a4a3d6
fill poly keras changes revert
mehtamansi29 Jun 5, 2025
4bbbbb8
diffbin_imagetextdetector import changes
mehtamansi29 Jun 5, 2025
5acaaca
diffbin_imagetextdetector changes
mehtamansi29 Jun 5, 2025
9b7d7c4
diffbin_imagetextdetector and precommit changes
mehtamansi29 Jun 6, 2025
38eab50
diffbin_imagetextdetector and precommit changes
mehtamansi29 Jun 6, 2025
9865bc0
diffbin_textdetector_1
mehtamansi29 Jun 9, 2025
e57d280
diffbin_textdetector_2
mehtamansi29 Jun 9, 2025
1e85236
diffbin_textdetector_3
mehtamansi29 Jun 10, 2025
5007488
diffbin_textdetector_4
mehtamansi29 Jun 10, 2025
55dd899
Merge branch 'keras-team:master' into diffbin
mehtamansi29 Jun 16, 2025
39ae6c3
diffbin_backbon_image_shape
mehtamansi29 Jun 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
db_utils functions and testfile
  • Loading branch information
mehtamansi29 committed Mar 11, 2025
commit d97f362992157a82d08e71129a4fdb3f65b5a13e
222 changes: 222 additions & 0 deletions keras_hub/src/utils/diffbin/db_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
import os
import numpy as np
import keras
import tensorflow as tf


def Polygon(coords):
"""
Calculate the area of a polygon using the Shoelace formula.
"""
coords = keras.ops.convert_to_tensor(coords,dtype="float32")
x = coords[:, 0]
y = coords[:, 1]

x_next = keras.ops.roll(x, shift=-1, axis=0)
y_next = keras.ops.roll(y, shift=-1, axis=0)

area = 0.5 * keras.ops.abs(keras.ops.sum(x * y_next - x_next * y))
return area

def shrink_polygan(poly,factor):
"""
Shrink polygan inwards by a scaling its coordinated towards centroid
"""
poly = keras.ops.convert_to_tensor(poly, dtype="float32")
centroid = keras.ops.mean(poly, axis=0) # Compute centroid

# Correct scaling towards centroid
shrinked_poly = centroid + (poly - centroid) * factor
return shrinked_poly

def binary_search_smallest_width(poly):
"""
The function aims maximum amount by which polygan can be shrunk by
taking polygan's smallest width
"""
if len(poly) < 3:
return 0

low, high = 0, 1 # Scale factor (1 = original size, 0 = collapsed to centroid)

while high - low > 0.01: # Precision threshold
mid = (high + low) / 2
mid_poly = shrink_polygan(poly, mid)
area = Polygon(mid_poly)

if area > 0.1:
low = mid
else:
high = mid

height = (low + high) / 2
height = (low + high) / 2
return int(height) if height >= 0.1 else 0

def project_point_to_line(x,u,v,axis=0):
"""
Projects a point x onto the line defined by points u and v
"""
x= keras.ops.convert_to_tensor(x,dtype="float32")
u= keras.ops.convert_to_tensor(u,dtype="float32")
v= keras.ops.convert_to_tensor(v,dtype="float32")

n = v - u
n = n / (keras.ops.norm(n, axis=axis, keepdims=True) + np.finfo(np.float32).eps)
p = u + n * keras.ops.sum((x - u) * n, axis=axis, keepdims=True)
return p

def project_point_to_segment(x,u,v,axis=0):
"""
Projects a point x onto the line segment defined by points u and v
"""
p = project_point_to_line(x, u, v, axis=axis)
outer = keras.ops.greater_equal(keras.ops.sum((u - p) * (v - p), axis=axis, keepdims=True), 0)
near_u = keras.ops.less_equal(keras.ops.norm(u - p, axis=axis, keepdims=True),keras.ops.norm(v - p, axis=axis, keepdims=True))
o = keras.ops.where(outer, keras.ops.where(near_u, u, v), p)
return o

def get_line_height(poly):
"""
Get the height of the line defined by the polygan
"""
return binary_search_smallest_width(poly)

def line_segment_intersection(x, y, polygon):
"""
Ray-casting algorithm to determine if a point is inside a polygon.
https://medium.com/@girishajmera/exploring-algorithms-to-determine-points-inside-or-outside-a-polygon-038952946f87
"""
inside = False
num_vertices = len(polygon)
for i in range(num_vertices):
x1, y1 = polygon[i]
x2, y2 = polygon[(i + 1) % num_vertices]
if (y1 > y) != (y2 > y) and x < x1 + (y - y1) * (x2 - x1) / (y2 - y1):
inside = not inside
return inside

def fill_poly(vertices, image_shape):
"""Fills a polygon using ray casting."""
height, width = image_shape
x = keras.ops.arange(width)
y = keras.ops.arange(height)
xx, yy = keras.ops.meshgrid(x, y)
xx = keras.ops.cast(xx, "float32")
yy = keras.ops.cast(yy, "float32")

result = keras.ops.zeros((height, width), dtype="float32")
xx_flat = keras.ops.reshape(xx, (-1,))
yy_flat = keras.ops.reshape(yy, (-1,))

inside_mask = [line_segment_intersection(xx_flat[i], yy_flat[i], vertices) for i in range(xx_flat.shape[0])]
inside_mask_tensor = keras.ops.reshape(keras.ops.convert_to_tensor(inside_mask, dtype="bool"), (height, width))
result = keras.ops.where(inside_mask_tensor, keras.ops.ones_like(result), result)
return result

def get_mask(w, h, polys, ignores):
"""
Generates a binary mask where:
- Ignored regions are set to 0
- Text regions are set to 1
"""
mask = keras.ops.ones((h, w), dtype="float32")

for poly, ignore in zip(polys, ignores):
poly = np.array(poly, np.int32)

if poly.shape[0] < 3:
print("Skipping invalid polygon:", poly)
continue

fill_value = 0.0 if ignore else 1.0
poly_mask = fill_poly(poly, (h, w))

if ignore:
mask = keras.ops.where(poly_mask == 1.0, keras.ops.zeros_like(mask), mask)
else:
mask = keras.ops.maximum(mask, poly_mask)
return mask

def get_region_coordinate(w, h, polys, heights, shrink):
"""
Extract coordinates of regions corresponding to text lines in image using keras.ops.
"""
label_map = keras.ops.zeros((h, w), dtype="int32")

for line_id, (poly, height) in enumerate(zip(polys, heights)):
if height > 0:
shrinked_poly = shrink_polygan(poly, 1 - height * shrink)
mask = fill_poly(shrinked_poly, (h, w))
label_map = keras.ops.where(mask > 0, (line_id + 1) * keras.ops.ones_like(label_map), label_map)

indices = keras.ops.convert_to_tensor(keras.ops.where(label_map > 0))
if keras.ops.shape(indices)[0] == 0:
return [np.zeros((0, 2), 'int32')]

label_map_flat = keras.ops.reshape(label_map, (-1,))
flattened_indices = indices[..., 0] * w + indices[..., 1]
region_labels = keras.ops.take(label_map_flat, flattened_indices)
unique_labels, _ = tf.unique(region_labels)
unique_labels = keras.ops.convert_to_tensor(unique_labels)

regions_coords = []

for label in unique_labels:
region_idx = keras.ops.where(label_map == label)
region_idx = keras.ops.convert_to_tensor(region_idx)

coords = keras.ops.stack([region_idx[..., 1], region_idx[..., 0]], axis=-1)
regions_coords.append(coords)

return regions_coords

def get_coords_poly_projection(coords,poly):
"""
This projects set of points onto edges of a polygan and return closest projected points
"""
start_points = keras.ops.array(poly, dtype="float32")
end_points = keras.ops.concatenate([keras.ops.array(poly[1:], dtype="float32"),
keras.ops.array(poly[:1], dtype="float32")], axis=0)
region_points = keras.ops.array(coords, dtype="float32")

projected_points = project_point_to_segment(
keras.ops.expand_dims(region_points, axis=1),
keras.ops.expand_dims(start_points, axis=0),
keras.ops.expand_dims(end_points, axis=0),
axis=2,
)

projection_distances = keras.ops.norm(
keras.ops.expand_dims(region_points, axis=1) - projected_points, axis=2
)

indices = keras.ops.expand_dims(keras.ops.argmin(projection_distances, axis=1), axis=-1)
best_projected_points = keras.ops.take_along_axis(projected_points, indices[..., None], axis=1)[:, 0, :]

return best_projected_points

def get_coords_poly_distance_keras(coords, poly):
"""
This function calculates distance between set of points and polygan
"""
projection = get_coords_poly_projection(coords, poly)
return keras.ops.linalg.norm(projection - coords, axis=1)

def get_normalized_weight(heatmap, mask,background_weight=3.0):
"""
This function calculates normalized weight of heatmap
"""
pos = keras.ops.greater_equal(heatmap, 0.5)
neg = keras.ops.ones_like(pos, dtype="float32") - keras.ops.cast(pos, dtype="float32")
pos = keras.ops.logical_and(pos, mask)
neg = keras.ops.logical_and(neg, mask)
npos = keras.ops.sum(pos)
nneg = keras.ops.sum(neg)
smooth = (keras.ops.cast(npos, dtype="float32") + keras.ops.cast(nneg, dtype="float32") + 1) * 0.05
wpos = (keras.ops.cast(nneg, dtype="float32") + smooth) / (keras.ops.cast(npos, dtype="float32") + smooth)
weight = np.zeros_like(heatmap)
weight[keras.ops.cast(neg, dtype="bool")] = background_weight
weight[keras.ops.cast(pos, dtype="bool")] = wpos
return weight

108 changes: 108 additions & 0 deletions keras_hub/src/utils/diffbin/db_utils_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import os
import unittest
import numpy as np
import keras
import tensorflow as tf

from .db_utils import Polygon, shrink_polygan, binary_search_smallest_width, project_point_to_line, project_point_to_segment, get_line_height, line_segment_intersection, fill_poly, get_mask, get_region_coordinate, get_coords_poly_projection, get_coords_poly_distance_keras, get_normalized_weight


class TestdbUtils(unittest.TestCase):
def test_Polygon(self):
coords = np.array([[0, 0], [1, 0], [1, 1], [0, 1]])
area = Polygon(coords)
self.assertAlmostEqual(area, 1.0)

coords = np.array([[0, 0], [2, 0], [2, 2]])
area = Polygon(coords)
self.assertAlmostEqual(area, 2.0)

def test_shrink_polygan(self):
poly = np.array([[0, 0], [2, 0], [2, 2], [0, 2]])
factor = 0.4
shrinked_poly = shrink_polygan(poly, factor).numpy()
expected = np.array([[1.0, 1.0], [2.0, 1.0], [1.0, 2.0], [1.0, 1.0]])
np.testing.assert_array_almost_equal(shrinked_poly, expected)


def test_binary_search_smallest_width(self):
poly = np.array([[0, 0], [2, 0], [2, 2], [0, 2]])
height = binary_search_smallest_width(poly)
self.assertIsInstance(height, int)
self.assertTrue(height >= 0)

def test_project_point_to_line(self):
x = np.array([1, 1])
u = np.array([0, 0])
v = np.array([2, 2])
projected = project_point_to_line(x, u, v).numpy()
expected = np.array([1, 1])
np.testing.assert_array_almost_equal(projected, expected)

def test_project_point_to_segment(self):
x = np.array([1, 1])
u = np.array([0, 0])
v = np.array([2, 2])
projected = project_point_to_segment(x, u, v).numpy()
expected = np.array([1, 1])
np.testing.assert_array_almost_equal(projected, expected)

x = np.array([3, 3])
projected = project_point_to_segment(x,u,v).numpy()
expected = np.array([2,2])
np.testing.assert_array_almost_equal(projected, expected)

def test_get_line_height(self):
poly = np.array([[0, 0], [2, 0], [2, 2], [0, 2]])
height = get_line_height(poly)
self.assertIsInstance(height, int)
self.assertTrue(height >= 0)

def test_line_segment_intersection(self):
polygon = np.array([[0, 0], [2, 0], [2, 2], [0, 2]])
self.assertTrue(line_segment_intersection(1, 1, polygon))
self.assertFalse(line_segment_intersection(3, 3, polygon))

def test_fill_poly(self):
vertices = np.array([[0, 0], [2, 0], [2, 2], [0, 2]])
image_shape = (4, 4)
mask = fill_poly(vertices, image_shape).numpy()
self.assertEqual(mask.shape, image_shape)
self.assertTrue(np.any(mask))

def test_get_mask(self):
w, h = 4, 4
polys = [[[0, 0], [2, 0], [2, 2], [0, 2]]]
ignores = [False]
mask = get_mask(w, h, polys, ignores).numpy()
self.assertEqual(mask.shape, (h, w))
self.assertTrue(np.any(mask))

def test_get_region_coordinate(self):
w, h = 4, 4
polys = [[[0, 0], [2, 0], [2, 2], [0, 2]]]
heights = [1]
shrink = 0.1
regions = get_region_coordinate(w, h, polys, heights, shrink)
self.assertTrue(isinstance(regions, list))

def test_get_coords_poly_projection(self):
coords = np.array([[1, 1], [3, 3]])
poly = np.array([[0, 0], [2, 0], [2, 2], [0, 2]])
projected = get_coords_poly_projection(coords, poly).numpy()
self.assertEqual(projected.shape, coords.shape)

def test_get_coords_poly_distance_keras(self):
coords = np.array([[1, 1], [3, 3]])
poly = np.array([[0, 0], [2, 0], [2, 2], [0, 2]])
distances = get_coords_poly_distance_keras(coords, poly).numpy()
self.assertEqual(distances.shape, (2,))

def test_get_normalized_weight(self):
heatmap = np.array([[0.1, 0.6], [0.8, 0.2]])
mask = np.array([[1, 1], [1, 1]])
weight = get_normalized_weight(heatmap, mask)
self.assertEqual(weight.shape, heatmap.shape)

if __name__ == '__main__':
unittest.main()