Skip to content

Commit aa65528

Browse files
authored
Merge pull request #51 from semjon00/main
improvements for stereo image generation
2 parents 43e1f7a + e3b444d commit aa65528

File tree

1 file changed

+97
-43
lines changed

1 file changed

+97
-43
lines changed

scripts/depthmap.py

Lines changed: 97 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from modules.processing import create_infotext, process_images, Processed
1212
from modules.shared import opts, cmd_opts, state, Options
1313
from modules import script_callbacks
14+
from numba import njit
1415
from torchvision.transforms import Compose, transforms
1516
from PIL import Image
1617
from pathlib import Path
@@ -85,14 +86,17 @@ def ui(self, is_img2img):
8586
with gr.Row():
8687
stereo_ipd = gr.Slider(minimum=5, maximum=7.5, step=0.1, label='IPD (cm)', value=6.4)
8788
stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
89+
with gr.Row():
90+
stereo_fill = gr.Checkbox(label="Improve accuracy", value=False)
91+
stereo_balance = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes', value=0.0)
8892

8993
with gr.Box():
9094
gr.HTML("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
9195

92-
return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size]
96+
return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance]
9397

9498
# run from script in txt2img or img2img
95-
def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size):
99+
def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance):
96100

97101
# sd process
98102
processed = processing.process_images(p)
@@ -106,13 +110,13 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
106110
continue
107111
inputimages.append(processed.images[count])
108112

109-
newmaps = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size)
113+
newmaps = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance)
110114
for img in newmaps:
111115
processed.images.append(img)
112116

113117
return processed
114118

115-
def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size):
119+
def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance):
116120

117121
# unload sd model
118122
shared.sd_model.cond_stage_model.to(devices.cpu)
@@ -331,14 +335,20 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
331335
if gen_stereo or gen_anaglyph:
332336
print("Generating Stereo image..")
333337
#img_output = cv2.blur(img_output, (3, 3))
334-
left_img = np.asarray(inputimages[count])
335-
right_img = generate_stereo(left_img, img_output, stereo_ipd, stereo_size)
336-
stereo_img = np.hstack([right_img, inputimages[count]])
338+
deviation = calculate_total_deviation(stereo_ipd, stereo_size, inputimages[count].width)
339+
balance = (stereo_balance + 1) / 2
340+
original_image = np.asarray(inputimages[count])
341+
left_image = original_image if balance < 0.001 else \
342+
apply_stereo_deviation(original_image, img_output, - deviation * balance, stereo_fill)
343+
right_image = original_image if balance > 0.999 else \
344+
apply_stereo_deviation(original_image, img_output, deviation * (1 - balance), stereo_fill)
345+
stereo_img = np.hstack([left_image, right_image])
346+
337347
if gen_stereo:
338348
outimages.append(stereo_img)
339349
if gen_anaglyph:
340350
print("Generating Anaglyph image..")
341-
anaglyph_img = overlap(right_img, left_img)
351+
anaglyph_img = overlap(left_image, right_image)
342352
outimages.append(anaglyph_img)
343353
if (processed is not None):
344354
if gen_stereo:
@@ -375,45 +385,82 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
375385

376386
return outimages
377387

388+
def calculate_total_deviation(ipd, monitor_w, image_width):
389+
deviation_cm = ipd * 0.12
390+
deviation = deviation_cm * monitor_w * (image_width / 1920)
391+
print("deviation:", deviation)
392+
return deviation
378393

394+
def apply_stereo_deviation(original_image, depth, deviation, fill_technique):
395+
import time
396+
print("TIME:", time.time())
397+
ret = apply_stereo_deviation_core(original_image, depth, deviation, fill_technique)
398+
print("TIME:", time.time())
399+
return ret
379400

380-
def generate_stereo(left_img, depth, ipd, monitor_w):
381-
#MONITOR_W = 38.5 #50 #38.5
382-
h, w, c = left_img.shape
401+
@njit
402+
def apply_stereo_deviation_core(original_image, depth, deviation, fill_technique):
403+
#MONITOR_W = 38.5 #50 #38.5
404+
h, w, c = original_image.shape
383405

384406
depth_min = depth.min()
385407
depth_max = depth.max()
386408
depth = (depth - depth_min) / (depth_max - depth_min)
387409

388-
right = np.zeros_like(left_img)
389-
390-
deviation_cm = ipd * 0.12
391-
deviation = deviation_cm * monitor_w * (w / 1920)
392-
393-
print("deviation:", deviation)
410+
derived_image = np.zeros_like(original_image)
411+
filled = np.zeros(h * w, dtype=np.uint8)
394412

395413
for row in range(h):
396-
for col in range(w):
397-
col_r = col - int((1 - depth[row][col] ** 2) * deviation)
398-
# col_r = col - int((1 - depth[row][col]) * deviation)
399-
if col_r >= 0:
400-
right[row][col_r] = left_img[row][col]
401-
402-
right_fix = np.array(right)
403-
gray = cv2.cvtColor(right_fix, cv2.COLOR_BGR2GRAY)
404-
rows, cols = np.where(gray == 0)
405-
for row, col in zip(rows, cols):
406-
for offset in range(1, int(deviation)):
407-
r_offset = col + offset
408-
l_offset = col - offset
409-
if r_offset < w and not np.all(right_fix[row][r_offset] == 0):
410-
right_fix[row][col] = right_fix[row][r_offset]
411-
break
412-
if l_offset >= 0 and not np.all(right_fix[row][l_offset] == 0):
413-
right_fix[row][col] = right_fix[row][l_offset]
414-
break
415-
416-
return right_fix
414+
# Swipe order should ensure that pixels that are closer overwrite
415+
# (at their destination) pixels that are less close
416+
for col in range(w) if deviation < 0 else range(w - 1, -1, -1):
417+
col_d = col + int((1 - depth[row][col] ** 2) * deviation)
418+
# col_d = col + int((1 - depth[row][col]) * deviation)
419+
if 0 <= col_d < w:
420+
derived_image[row][col_d] = original_image[row][col]
421+
filled[row * w + col_d] = 1
422+
423+
# Fill the gaps
424+
if fill_technique == 2: # soft_horizontal
425+
for row in range(h):
426+
for l_pointer in range(w):
427+
# This if (and the next if) performs two checks that are almost the same - for performance reasons
428+
if sum(derived_image[row][l_pointer]) != 0 or filled[row * w + l_pointer]:
429+
continue
430+
l_border = derived_image[row][l_pointer - 1] if l_pointer > 0 else np.zeros(3, dtype=np.uint8)
431+
r_border = np.zeros(3, dtype=np.uint8)
432+
r_pointer = l_pointer + 1
433+
while r_pointer != w:
434+
if sum(derived_image[row][r_pointer]) != 0 and filled[row * w + r_pointer]:
435+
r_border = derived_image[row][r_pointer]
436+
break
437+
r_pointer += 1
438+
if sum(l_border) == 0:
439+
l_border = r_border
440+
elif sum(r_border) == 0:
441+
r_border = l_border
442+
total_steps = 1 + r_pointer - l_pointer
443+
step = (r_border.astype(np.float_) - l_border) / total_steps
444+
for col in range(l_pointer, r_pointer):
445+
derived_image[row][col] = l_border + (step * (col - l_pointer + 1)).astype(np.uint8)
446+
return derived_image
447+
elif fill_technique == 1: # hard_horizontal
448+
derived_fix = np.copy(derived_image)
449+
for pos in np.where(filled == 0)[0]:
450+
row = pos // w
451+
col = pos % w
452+
for offset in range(1, abs(int(deviation)) + 2):
453+
r_offset = col + offset
454+
l_offset = col - offset
455+
if r_offset < w and filled[row * w + r_offset]:
456+
derived_fix[row][col] = derived_image[row][r_offset]
457+
break
458+
if 0 <= l_offset and filled[row * w + l_offset]:
459+
derived_fix[row][col] = derived_image[row][l_offset]
460+
break
461+
return derived_fix
462+
else: # none
463+
return derived_image
417464

418465
def overlap(im1, im2):
419466
width1 = im1.shape[1]
@@ -463,7 +510,9 @@ def run_generate(depthmap_mode,
463510
gen_stereo,
464511
gen_anaglyph,
465512
stereo_ipd,
466-
stereo_size
513+
stereo_size,
514+
stereo_fill,
515+
stereo_balance
467516
):
468517

469518
imageArr = []
@@ -500,7 +549,7 @@ def run_generate(depthmap_mode,
500549
outpath = opts.outdir_samples or opts.outdir_extras_samples
501550

502551

503-
outputs = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size)
552+
outputs = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance)
504553

505554
return outputs, plaintext_to_html('info'), ''
506555

@@ -551,7 +600,10 @@ def on_ui_tabs():
551600
gen_anaglyph = gr.Checkbox(label="Generate Stereo anaglyph image (red/cyan)",value=False)
552601
with gr.Row():
553602
stereo_ipd = gr.Slider(minimum=5, maximum=7.5, step=0.1, label='IPD (cm)', value=6.4)
554-
stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
603+
stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
604+
with gr.Row():
605+
stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'hard_horizontal', 'soft_horizontal'], value='soft_horizontal', type="index", elem_id="stereo_fill_type")
606+
stereo_balance = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes', value=0.0)
555607

556608
with gr.Box():
557609
gr.HTML("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
@@ -590,7 +642,9 @@ def on_ui_tabs():
590642
gen_stereo,
591643
gen_anaglyph,
592644
stereo_ipd,
593-
stereo_size
645+
stereo_size,
646+
stereo_fill,
647+
stereo_balance
594648
],
595649
outputs=[
596650
result_images,
@@ -1212,7 +1266,7 @@ def estimateboost(img, model, model_type, pix2pixmodel):
12121266

12131267
# Generate the base estimate using the double estimation.
12141268
whole_estimate = doubleestimate(img, net_receptive_field_size, whole_image_optimal_size, pix2pixsize, model, model_type, pix2pixmodel)
1215-
1269+
12161270
# Compute the multiplier described in section 6 of the main paper to make sure our initial patch can select
12171271
# small high-density regions of the image.
12181272
global factor

0 commit comments

Comments
 (0)