1+ import numpy as np
2+ import cv2
3+ import ast
4+ from PIL import Image , ImageDraw , ImageFont
5+
6+ color_rgb = [(255 ,255 ,0 ), (255 , 128 ,0 ), (128 ,255 ,0 ), (0 ,128 ,255 ), (0 ,0 ,255 ), (127 ,0 ,255 ), (255 ,0 ,255 ), (255 ,0 ,127 ), (255 ,0 ,0 ), (255 ,204 ,153 ), (255 ,102 ,102 ), (153 ,255 ,153 ), (153 ,153 ,255 ), (0 ,0 ,153 )]
7+ color_rgba = [(255 ,255 ,0 ,70 ), (255 , 128 ,0 ,70 ), (128 ,255 ,0 ,70 ), (0 ,128 ,255 ,70 ), (0 ,0 ,255 ,70 ), (127 ,0 ,255 ,70 ), (255 ,0 ,255 ,70 ), (255 ,0 ,127 ,70 ), (255 ,0 ,0 ,70 ), (255 ,204 ,153 ,70 ), (255 ,102 ,102 ,70 ), (153 ,255 ,153 ,70 ), (153 ,153 ,255 ,70 ), (0 ,0 ,153 ,70 )]
8+
9+
10+ hand_rgb = [(0 , 90 , 181 ), (220 , 50 , 32 )]
11+ hand_rgba = [(0 , 90 , 181 , 70 ), (220 , 50 , 32 , 70 )]
12+
13+ obj_rgb = (255 , 194 , 10 )
14+ obj_rgba = (255 , 194 , 10 , 70 )
15+
16+ side_map = {'l' :'Left' , 'r' :'Right' }
17+ side_map2 = {0 :'Left' , 1 :'Right' }
18+ side_map3 = {0 :'L' , 1 :'R' }
19+ state_map = {0 :'No Contact' , 1 :'Self Contact' , 2 :'Another Person' , 3 :'Portable Object' , 4 :'Stationary Object' }
20+ state_map2 = {0 :'N' , 1 :'S' , 2 :'O' , 3 :'P' , 4 :'F' }
21+
22+ vis_settings = {'font_size' :20 , 'line_width' :2 , 'point_radius' :4 , 'hand_color' :hand_rgb , 'hand_alpha' :[None , None ], 'obj_color' :obj_rgb , 'obj_alpha' :None , 'text_alpha' :(255 , 255 , 255 , 255 )}
23+
24+ def calculate_center (bb ):
25+ return [(bb [0 ] + bb [2 ])/ 2 , (bb [1 ] + bb [3 ])/ 2 ]
26+
27+ def filter_object (obj_dets , hand_dets ):
28+ filtered_object = []
29+ object_cc_list = []
30+ for j in range (obj_dets .shape [0 ]):
31+ object_cc_list .append (calculate_center (obj_dets [j ,:4 ]))
32+ object_cc_list = np .array (object_cc_list )
33+ img_obj_id = []
34+ for i in range (hand_dets .shape [0 ]):
35+ if hand_dets [i , 5 ] <= 0 :
36+ img_obj_id .append (- 1 )
37+ continue
38+ hand_cc = np .array (calculate_center (hand_dets [i ,:4 ]))
39+ point_cc = np .array ([(hand_cc [0 ]+ hand_dets [i ,6 ]* 10000 * hand_dets [i ,7 ]), (hand_cc [1 ]+ hand_dets [i ,6 ]* 10000 * hand_dets [i ,8 ])])
40+ dist = np .sum ((object_cc_list - point_cc )** 2 ,axis = 1 )
41+ dist_min = np .argmin (dist )
42+ img_obj_id .append (dist_min )
43+ return img_obj_id
44+
45+ def draw_obj_mask (image , draw , obj_idx , obj_bbox , obj_score , width , height ):
46+ font = ImageFont .truetype ('llava/action/times_b.ttf' , size = vis_settings ['font_size' ])
47+ mask = Image .new ('RGBA' , (width , height ))
48+ pmask = ImageDraw .Draw (mask )
49+ pmask .rectangle (obj_bbox , outline = vis_settings ['obj_color' ], width = vis_settings ['line_width' ], fill = vis_settings ['obj_alpha' ])
50+ image .paste (mask , (0 ,0 ), mask )
51+
52+ draw .rectangle ([obj_bbox [0 ], max (0 , obj_bbox [1 ]- vis_settings ['font_size' ]), obj_bbox [0 ]+ vis_settings ['font_size' ]+ 2 ,
53+ max (0 , obj_bbox [1 ]- vis_settings ['font_size' ])+ vis_settings ['font_size' ]],
54+ fill = vis_settings ['text_alpha' ], outline = vis_settings ['obj_color' ], width = vis_settings ['line_width' ])
55+ draw .text ((obj_bbox [0 ]+ 5 , max (0 , obj_bbox [1 ]- vis_settings ['font_size' ])- 2 ), f'O' , font = font , fill = (0 ,0 ,0 )) #
56+
57+ return image
58+
59+ def draw_hand_mask (image , draw , hand_idx , hand_bbox , hand_score , side , state , width , height ):
60+ font = ImageFont .truetype ('llava/action/times_b.ttf' , size = vis_settings ['font_size' ])
61+ if side == 0 :
62+ side_idx = 0
63+ elif side == 1 :
64+ side_idx = 1
65+ mask = Image .new ('RGBA' , (width , height ))
66+ pmask = ImageDraw .Draw (mask )
67+ pmask .rectangle (hand_bbox , outline = vis_settings ['hand_color' ][side_idx ], width = vis_settings ['line_width' ], fill = vis_settings ['hand_alpha' ][side_idx ])
68+ image .paste (mask , (0 ,0 ), mask )
69+ # text
70+
71+ draw = ImageDraw .Draw (image )
72+ draw .rectangle ([hand_bbox [0 ], max (0 , hand_bbox [1 ]- vis_settings ['font_size' ]), hand_bbox [0 ]+ vis_settings ['font_size' ]* 2 + 2 ,
73+ max (0 , hand_bbox [1 ]- vis_settings ['font_size' ])+ vis_settings ['font_size' ]],
74+ fill = vis_settings ['text_alpha' ], outline = vis_settings ['hand_color' ][side_idx ], width = vis_settings ['line_width' ])
75+ draw .text ((hand_bbox [0 ]+ 6 , max (0 , hand_bbox [1 ]- vis_settings ['font_size' ])- 2 ), f'{ side_map3 [int (float (side ))]} -{ state_map2 [int (float (state ))]} ' , font = font , fill = (0 ,0 ,0 )) #
76+
77+ return image
78+
79+ def draw_line_point (draw , side_idx , hand_center , object_center ):
80+
81+ draw .line ([hand_center , object_center ], fill = vis_settings ['hand_color' ][side_idx ], width = vis_settings ['line_width' ])
82+ x , y = hand_center [0 ], hand_center [1 ]
83+ r = vis_settings ['point_radius' ]
84+ draw .ellipse ((x - r , y - r , x + r , y + r ), fill = vis_settings ['hand_color' ][side_idx ])
85+ x , y = object_center [0 ], object_center [1 ]
86+ draw .ellipse ((x - r , y - r , x + r , y + r ), fill = vis_settings ['obj_color' ])
87+
88+ def vis_detections_PIL (im , class_name , dets , thresh = 0.8 ):
89+ """Visual debugging of detections."""
90+
91+ image = Image .fromarray (im ).convert ("RGBA" )
92+ draw = ImageDraw .Draw (image )
93+ width , height = image .size
94+
95+ for hand_idx , i in enumerate (range (np .minimum (10 , dets .shape [0 ]))):
96+ bbox = list (int (np .round (x )) for x in dets [i , :4 ])
97+ score = dets [i , 4 ]
98+ lr = dets [i , - 1 ]
99+ state = dets [i , 5 ]
100+ if score > thresh :
101+ image = draw_hand_mask (image , draw , hand_idx , bbox , score , lr , state , width , height )
102+
103+ return image
104+
105+ def vis_detections_filtered_objects_PIL (im , obj_dets , hand_dets , thresh_hand = 0.8 , thresh_obj = 0.01 ):
106+
107+ # convert to PIL
108+ im = im [:,:,::- 1 ]
109+ image = Image .fromarray (im ).convert ("RGBA" )
110+ draw = ImageDraw .Draw (image )
111+ width , height = image .size
112+
113+ if (obj_dets is not None ) and (hand_dets is not None ):
114+ img_obj_id = filter_object (obj_dets , hand_dets )
115+ for obj_idx , i in enumerate (range (np .minimum (10 , obj_dets .shape [0 ]))):
116+ bbox = list (int (np .round (x )) for x in obj_dets [i , :4 ])
117+ score = obj_dets [i , 4 ]
118+ if score > thresh_obj and i in img_obj_id :
119+ # viz obj by PIL
120+ image = draw_obj_mask (image , draw , obj_idx , bbox , score , width , height )
121+
122+ for hand_idx , i in enumerate (range (np .minimum (10 , hand_dets .shape [0 ]))):
123+ bbox = list (int (np .round (x )) for x in hand_dets [i , :4 ])
124+ score = hand_dets [i , 4 ]
125+ lr = hand_dets [i , - 1 ]
126+ state = hand_dets [i , 5 ]
127+ if score > thresh_hand :
128+ # viz hand by PIL
129+ image = draw_hand_mask (image , draw , hand_idx , bbox , score , lr , state , width , height )
130+
131+ if state > 0 : # in contact hand
132+
133+ obj_cc , hand_cc = calculate_center (obj_dets [img_obj_id [i ],:4 ]), calculate_center (bbox )
134+ # viz line by PIL
135+ if lr == 0 :
136+ side_idx = 0
137+ elif lr == 1 :
138+ side_idx = 1
139+ draw_line_point (draw , side_idx , (int (hand_cc [0 ]), int (hand_cc [1 ])), (int (obj_cc [0 ]), int (obj_cc [1 ])))
140+
141+ elif hand_dets is not None :
142+ image = vis_detections_PIL (im , 'hand' , hand_dets , thresh_hand )
143+
144+ return image
145+
146+ def render_frame (im , hand_dets , obj_dets , thresh_hand = 0.5 , thresh_obj = 0.5 ):
147+ im_show = im .copy ()
148+ im_show = cv2 .cvtColor (im_show , cv2 .COLOR_RGB2BGR )
149+ hand_dets = np .array (ast .literal_eval (hand_dets )) if hand_dets != '[]' else None
150+ obj_dets = np .array (ast .literal_eval (obj_dets )) if obj_dets != '[]' else None
151+ im_show = vis_detections_filtered_objects_PIL (im_show , obj_dets , hand_dets , thresh_hand , thresh_obj )
152+ # im_show.save('test.png')
153+ im_show = np .array (im_show )
154+ return im_show
0 commit comments