11
11
from modules .processing import create_infotext , process_images , Processed
12
12
from modules .shared import opts , cmd_opts , state , Options
13
13
from modules import script_callbacks
14
+ from numba import njit
14
15
from torchvision .transforms import Compose , transforms
15
16
from PIL import Image
16
17
from pathlib import Path
@@ -85,14 +86,17 @@ def ui(self, is_img2img):
85
86
with gr .Row ():
86
87
stereo_ipd = gr .Slider (minimum = 5 , maximum = 7.5 , step = 0.1 , label = 'IPD (cm)' , value = 6.4 )
87
88
stereo_size = gr .Slider (minimum = 20 , maximum = 100 , step = 0.5 , label = 'Screen Width (cm)' , value = 38.5 )
89
+ with gr .Row ():
90
+ stereo_fill = gr .Checkbox (label = "Improve accuracy" , value = False )
91
+ stereo_balance = gr .Slider (minimum = - 1.0 , maximum = 1.0 , step = 0.05 , label = 'Balance between eyes' , value = 0.0 )
88
92
89
93
with gr .Box ():
90
94
gr .HTML ("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>" )
91
95
92
- return [compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size ]
96
+ return [compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ]
93
97
94
98
# run from script in txt2img or img2img
95
- def run (self , p , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size ):
99
+ def run (self , p , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ):
96
100
97
101
# sd process
98
102
processed = processing .process_images (p )
@@ -106,13 +110,13 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
106
110
continue
107
111
inputimages .append (processed .images [count ])
108
112
109
- newmaps = run_depthmap (processed , p .outpath_samples , inputimages , None , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size )
113
+ newmaps = run_depthmap (processed , p .outpath_samples , inputimages , None , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance )
110
114
for img in newmaps :
111
115
processed .images .append (img )
112
116
113
117
return processed
114
118
115
- def run_depthmap (processed , outpath , inputimages , inputnames , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size ):
119
+ def run_depthmap (processed , outpath , inputimages , inputnames , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ):
116
120
117
121
# unload sd model
118
122
shared .sd_model .cond_stage_model .to (devices .cpu )
@@ -331,14 +335,20 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
331
335
if gen_stereo or gen_anaglyph :
332
336
print ("Generating Stereo image.." )
333
337
#img_output = cv2.blur(img_output, (3, 3))
334
- left_img = np .asarray (inputimages [count ])
335
- right_img = generate_stereo (left_img , img_output , stereo_ipd , stereo_size )
336
- stereo_img = np .hstack ([right_img , inputimages [count ]])
338
+ deviation = calculate_total_deviation (stereo_ipd , stereo_size , inputimages [count ].width )
339
+ balance = (stereo_balance + 1 ) / 2
340
+ original_image = np .asarray (inputimages [count ])
341
+ left_image = original_image if balance < 0.001 else \
342
+ apply_stereo_deviation (original_image , img_output , - deviation * balance , stereo_fill )
343
+ right_image = original_image if balance > 0.999 else \
344
+ apply_stereo_deviation (original_image , img_output , deviation * (1 - balance ), stereo_fill )
345
+ stereo_img = np .hstack ([left_image , right_image ])
346
+
337
347
if gen_stereo :
338
348
outimages .append (stereo_img )
339
349
if gen_anaglyph :
340
350
print ("Generating Anaglyph image.." )
341
- anaglyph_img = overlap (right_img , left_img )
351
+ anaglyph_img = overlap (left_image , right_image )
342
352
outimages .append (anaglyph_img )
343
353
if (processed is not None ):
344
354
if gen_stereo :
@@ -375,45 +385,82 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
375
385
376
386
return outimages
377
387
388
+ def calculate_total_deviation (ipd , monitor_w , image_width ):
389
+ deviation_cm = ipd * 0.12
390
+ deviation = deviation_cm * monitor_w * (image_width / 1920 )
391
+ print ("deviation:" , deviation )
392
+ return deviation
378
393
394
+ def apply_stereo_deviation (original_image , depth , deviation , fill_technique ):
395
+ import time
396
+ print ("TIME:" , time .time ())
397
+ ret = apply_stereo_deviation_core (original_image , depth , deviation , fill_technique )
398
+ print ("TIME:" , time .time ())
399
+ return ret
379
400
380
- def generate_stereo (left_img , depth , ipd , monitor_w ):
381
- #MONITOR_W = 38.5 #50 #38.5
382
- h , w , c = left_img .shape
401
+ @njit
402
+ def apply_stereo_deviation_core (original_image , depth , deviation , fill_technique ):
403
+ #MONITOR_W = 38.5 #50 #38.5
404
+ h , w , c = original_image .shape
383
405
384
406
depth_min = depth .min ()
385
407
depth_max = depth .max ()
386
408
depth = (depth - depth_min ) / (depth_max - depth_min )
387
409
388
- right = np .zeros_like (left_img )
389
-
390
- deviation_cm = ipd * 0.12
391
- deviation = deviation_cm * monitor_w * (w / 1920 )
392
-
393
- print ("deviation:" , deviation )
410
+ derived_image = np .zeros_like (original_image )
411
+ filled = np .zeros (h * w , dtype = np .uint8 )
394
412
395
413
for row in range (h ):
396
- for col in range (w ):
397
- col_r = col - int ((1 - depth [row ][col ] ** 2 ) * deviation )
398
- # col_r = col - int((1 - depth[row][col]) * deviation)
399
- if col_r >= 0 :
400
- right [row ][col_r ] = left_img [row ][col ]
401
-
402
- right_fix = np .array (right )
403
- gray = cv2 .cvtColor (right_fix , cv2 .COLOR_BGR2GRAY )
404
- rows , cols = np .where (gray == 0 )
405
- for row , col in zip (rows , cols ):
406
- for offset in range (1 , int (deviation )):
407
- r_offset = col + offset
408
- l_offset = col - offset
409
- if r_offset < w and not np .all (right_fix [row ][r_offset ] == 0 ):
410
- right_fix [row ][col ] = right_fix [row ][r_offset ]
411
- break
412
- if l_offset >= 0 and not np .all (right_fix [row ][l_offset ] == 0 ):
413
- right_fix [row ][col ] = right_fix [row ][l_offset ]
414
- break
415
-
416
- return right_fix
414
+ # Swipe order should ensure that pixels that are closer overwrite
415
+ # (at their destination) pixels that are less close
416
+ for col in range (w ) if deviation < 0 else range (w - 1 , - 1 , - 1 ):
417
+ col_d = col + int ((1 - depth [row ][col ] ** 2 ) * deviation )
418
+ # col_d = col + int((1 - depth[row][col]) * deviation)
419
+ if 0 <= col_d < w :
420
+ derived_image [row ][col_d ] = original_image [row ][col ]
421
+ filled [row * w + col_d ] = 1
422
+
423
+ # Fill the gaps
424
+ if fill_technique == 2 : # soft_horizontal
425
+ for row in range (h ):
426
+ for l_pointer in range (w ):
427
+ # This if (and the next if) performs two checks that are almost the same - for performance reasons
428
+ if sum (derived_image [row ][l_pointer ]) != 0 or filled [row * w + l_pointer ]:
429
+ continue
430
+ l_border = derived_image [row ][l_pointer - 1 ] if l_pointer > 0 else np .zeros (3 , dtype = np .uint8 )
431
+ r_border = np .zeros (3 , dtype = np .uint8 )
432
+ r_pointer = l_pointer + 1
433
+ while r_pointer != w :
434
+ if sum (derived_image [row ][r_pointer ]) != 0 and filled [row * w + r_pointer ]:
435
+ r_border = derived_image [row ][r_pointer ]
436
+ break
437
+ r_pointer += 1
438
+ if sum (l_border ) == 0 :
439
+ l_border = r_border
440
+ elif sum (r_border ) == 0 :
441
+ r_border = l_border
442
+ total_steps = 1 + r_pointer - l_pointer
443
+ step = (r_border .astype (np .float_ ) - l_border ) / total_steps
444
+ for col in range (l_pointer , r_pointer ):
445
+ derived_image [row ][col ] = l_border + (step * (col - l_pointer + 1 )).astype (np .uint8 )
446
+ return derived_image
447
+ elif fill_technique == 1 : # hard_horizontal
448
+ derived_fix = np .copy (derived_image )
449
+ for pos in np .where (filled == 0 )[0 ]:
450
+ row = pos // w
451
+ col = pos % w
452
+ for offset in range (1 , abs (int (deviation )) + 2 ):
453
+ r_offset = col + offset
454
+ l_offset = col - offset
455
+ if r_offset < w and filled [row * w + r_offset ]:
456
+ derived_fix [row ][col ] = derived_image [row ][r_offset ]
457
+ break
458
+ if 0 <= l_offset and filled [row * w + l_offset ]:
459
+ derived_fix [row ][col ] = derived_image [row ][l_offset ]
460
+ break
461
+ return derived_fix
462
+ else : # none
463
+ return derived_image
417
464
418
465
def overlap (im1 , im2 ):
419
466
width1 = im1 .shape [1 ]
@@ -463,7 +510,9 @@ def run_generate(depthmap_mode,
463
510
gen_stereo ,
464
511
gen_anaglyph ,
465
512
stereo_ipd ,
466
- stereo_size
513
+ stereo_size ,
514
+ stereo_fill ,
515
+ stereo_balance
467
516
):
468
517
469
518
imageArr = []
@@ -500,7 +549,7 @@ def run_generate(depthmap_mode,
500
549
outpath = opts .outdir_samples or opts .outdir_extras_samples
501
550
502
551
503
- outputs = run_depthmap (None , outpath , imageArr , imageNameArr , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size )
552
+ outputs = run_depthmap (None , outpath , imageArr , imageNameArr , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance )
504
553
505
554
return outputs , plaintext_to_html ('info' ), ''
506
555
@@ -551,7 +600,10 @@ def on_ui_tabs():
551
600
gen_anaglyph = gr .Checkbox (label = "Generate Stereo anaglyph image (red/cyan)" ,value = False )
552
601
with gr .Row ():
553
602
stereo_ipd = gr .Slider (minimum = 5 , maximum = 7.5 , step = 0.1 , label = 'IPD (cm)' , value = 6.4 )
554
- stereo_size = gr .Slider (minimum = 20 , maximum = 100 , step = 0.5 , label = 'Screen Width (cm)' , value = 38.5 )
603
+ stereo_size = gr .Slider (minimum = 20 , maximum = 100 , step = 0.5 , label = 'Screen Width (cm)' , value = 38.5 )
604
+ with gr .Row ():
605
+ stereo_fill = gr .Dropdown (label = "Gap fill technique" , choices = ['none' , 'hard_horizontal' , 'soft_horizontal' ], value = 'soft_horizontal' , type = "index" , elem_id = "stereo_fill_type" )
606
+ stereo_balance = gr .Slider (minimum = - 1.0 , maximum = 1.0 , step = 0.05 , label = 'Balance between eyes' , value = 0.0 )
555
607
556
608
with gr .Box ():
557
609
gr .HTML ("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>" )
@@ -590,7 +642,9 @@ def on_ui_tabs():
590
642
gen_stereo ,
591
643
gen_anaglyph ,
592
644
stereo_ipd ,
593
- stereo_size
645
+ stereo_size ,
646
+ stereo_fill ,
647
+ stereo_balance
594
648
],
595
649
outputs = [
596
650
result_images ,
@@ -1212,7 +1266,7 @@ def estimateboost(img, model, model_type, pix2pixmodel):
1212
1266
1213
1267
# Generate the base estimate using the double estimation.
1214
1268
whole_estimate = doubleestimate (img , net_receptive_field_size , whole_image_optimal_size , pix2pixsize , model , model_type , pix2pixmodel )
1215
-
1269
+
1216
1270
# Compute the multiplier described in section 6 of the main paper to make sure our initial patch can select
1217
1271
# small high-density regions of the image.
1218
1272
global factor
0 commit comments