Skip to content

Commit aa4ef10

Browse files
committed
Single Video mode (Experimental)
1 parent e2f7da7 commit aa4ef10

File tree

6 files changed

+288
-46
lines changed

6 files changed

+288
-46
lines changed

scripts/depthmap.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import gradio as gr
33
from modules import shared
44
import modules.scripts as scripts
5+
from PIL import Image
56

67
from src import backbone
78
from src import common_ui
@@ -46,8 +47,8 @@ def run(self, p, *inputs):
4647
gen_obj = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, backbone.gather_ops())
4748

4849
for input_i, type, result in gen_obj:
49-
if type in ['simple_mesh', 'inpainted_mesh']:
50-
continue # We are in script mode: do nothing with the filenames
50+
if not isinstance(result, Image.Image):
51+
continue
5152

5253
# get generation parameters
5354
# TODO: could reuse

scripts/depthmap_api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,9 @@ async def process(
6767

6868
results_based = []
6969
for count, type, result in gen_obj:
70-
if type not in ['simple_mesh', 'inpainted_mesh']:
71-
results_based += [encode_to_base64(result)]
70+
if not isinstance(result, Image.Image):
71+
continue
72+
results_based += [encode_to_base64(result)]
7273
return {"images": results_based, "info": "Success"}
7374

7475

src/common_constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ def __init__(self, default_value=None, *args):
2525
DO_OUTPUT_DEPTH = True
2626
OUTPUT_DEPTH_INVERT = False
2727
OUTPUT_DEPTH_COMBINE = False
28-
OUTPUT_DEPTH_COMBINE_AXIS = "Horizontal"
28+
OUTPUT_DEPTH_COMBINE_AXIS = "Horizontal" # Format (str) is subject to change
29+
DO_OUTPUT_DEPTH_PREDICTION = False # Hidden, do not use, subject to change
2930

3031
CLIPDEPTH = False
3132
CLIPDEPTH_FAR = 0.0

src/common_ui.py

Lines changed: 76 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import gradio as gr
44
from PIL import Image
55

6-
from src import backbone
6+
from src import backbone, video_mode
77
from src.core import core_generation_funnel, unload_models, run_makevideo
88
from src.depthmap_generation import ModelHolder
99
from src.gradio_args_transport import GradioComponentBundle
@@ -217,6 +217,33 @@ def open_folder_action():
217217
else:
218218
sp.Popen(["xdg-open", path])
219219

220+
221+
def depthmap_mode_video(inp):
222+
inp += gr.File(elem_id='depthmap_input_video', label="Video or animated file",
223+
file_count="single", interactive=True, type="file")
224+
inp += gr.Checkbox(elem_id="depthmap_vm_custom_checkbox",
225+
label="Use custom/pregenerated DepthMap video", value=False)
226+
inp += gr.File(elem_id='depthmap_vm_custom', file_count="single",
227+
interactive=True, type="file", visible=False)
228+
with gr.Row():
229+
inp += gr.Checkbox(elem_id='depthmap_vm_compress_checkbox', label="Compress colorvideos?", value=False)
230+
inp += gr.Slider(elem_id='depthmap_vm_compress_bitrate', label="Bitrate (kbit)", visible=False,
231+
minimum=1000, value=15000, maximum=50000, step=250)
232+
233+
inp['depthmap_vm_custom_checkbox'].change(
234+
fn=lambda v: inp['depthmap_vm_custom'].update(visible=v),
235+
inputs=[inp['depthmap_vm_custom_checkbox']],
236+
outputs=[inp['depthmap_vm_custom']]
237+
)
238+
239+
inp['depthmap_vm_compress_checkbox'].change(
240+
fn=lambda v: inp['depthmap_vm_compress_bitrate'].update(visible=v),
241+
inputs=[inp['depthmap_vm_compress_checkbox']],
242+
outputs=[inp['depthmap_vm_compress_bitrate']]
243+
)
244+
245+
return inp
246+
220247
def on_ui_tabs():
221248
inp = GradioComponentBundle()
222249
with gr.Blocks(analytics_enabled=False, title="DepthMap") as depthmap_interface:
@@ -248,6 +275,8 @@ def on_ui_tabs():
248275
label="Skip generation and use (edited/custom) depthmaps "
249276
"in output directory when a file already exists.",
250277
value=True)
278+
with gr.TabItem('Single Video') as depthmap_mode_3:
279+
inp = depthmap_mode_video(inp)
251280
submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary')
252281
inp |= main_ui_panel(True) # Main panel is inserted here
253282
unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels")
@@ -310,6 +339,7 @@ def on_ui_tabs():
310339
depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode'])
311340
depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode'])
312341
depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode'])
342+
depthmap_mode_3.select(lambda: '3', None, inp['depthmap_mode'])
313343

314344
def custom_depthmap_change_fn(turned_on):
315345
return inp['custom_depthmap_img'].update(visible=turned_on), \
@@ -369,6 +399,18 @@ def custom_depthmap_change_fn(turned_on):
369399
return depthmap_interface
370400

371401

402+
def format_exception(e: Exception):
403+
traceback.print_exc()
404+
msg = '<h3>' + 'ERROR: ' + str(e) + '</h3>' + '\n'
405+
if 'out of GPU memory' not in msg:
406+
msg += \
407+
'Please report this issue ' \
408+
f'<a href="https://github.com/thygate/{REPOSITORY_NAME}/issues">here</a>. ' \
409+
'Make sure to provide the full stacktrace: \n'
410+
msg += '<code style="white-space: pre;">' + traceback.format_exc() + '</code>'
411+
return msg
412+
413+
372414
def run_generate(*inputs):
373415
inputs = GradioComponentBundle.enkey_to_dict(inputs)
374416
depthmap_mode = inputs['depthmap_mode']
@@ -381,10 +423,21 @@ def run_generate(*inputs):
381423
custom_depthmap_img = inputs['custom_depthmap_img']
382424

383425
inputimages = []
384-
# Allow supplying custom depthmaps
385-
inputdepthmaps = []
386-
# Also keep track of original file names
387-
inputnames = []
426+
inputdepthmaps = [] # Allow supplying custom depthmaps
427+
inputnames = [] # Also keep track of original file names
428+
429+
if depthmap_mode == '3':
430+
try:
431+
custom_depthmap = inputs['depthmap_vm_custom'] \
432+
if inputs['depthmap_vm_custom_checkbox'] else None
433+
colorvids_bitrate = inputs['depthmap_vm_compress_bitrate'] \
434+
if inputs['depthmap_vm_compress_checkbox'] else None
435+
ret = video_mode.gen_video(
436+
inputs['depthmap_input_video'], backbone.get_outpath(), inputs, custom_depthmap, colorvids_bitrate)
437+
return [], None, None, ret
438+
except Exception as e:
439+
ret = format_exception(e)
440+
return [], None, None, ret
388441

389442
if depthmap_mode == '2' and depthmap_batch_output_dir != '':
390443
outpath = depthmap_batch_output_dir
@@ -410,7 +463,9 @@ def run_generate(*inputs):
410463
image = Image.open(os.path.abspath(img.name))
411464
inputimages.append(image)
412465
inputnames.append(os.path.splitext(img.orig_name)[0])
466+
print(f'{len(inputimages)} images will be processed')
413467
elif depthmap_mode == '2': # Batch from Directory
468+
# TODO: There is a RAM leak when we process batches, I can smell it! Or maybe it is gone.
414469
assert not backbone.get_cmd_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled'
415470
if depthmap_batch_input_dir == '':
416471
return [], None, None, "Please select an input directory."
@@ -444,40 +499,40 @@ def run_generate(*inputs):
444499

445500
gen_obj = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops())
446501

447-
show_images = []
502+
# Saving images
503+
img_results = []
504+
results_total = 0
448505
inpainted_mesh_fi = mesh_simple_fi = None
449506
msg = "" # Empty string is never returned
450507
while True:
451508
try:
452509
input_i, type, result = next(gen_obj)
510+
results_total += 1
453511
except StopIteration:
454512
# TODO: return more info
455-
msg = '<h3>Successfully generated.</h3>'
513+
msg = '<h3>Successfully generated</h3>' if results_total > 0 else \
514+
'<h3>Successfully generated nothing - please check the settings and try again</h3>'
456515
break
457516
except Exception as e:
458-
traceback.print_exc()
459-
msg = '<h3>' + 'ERROR: ' + str(e) + '</h3>' + '\n'
460-
if 'out of GPU memory' not in msg:
461-
msg +=\
462-
'Please report this issue ' \
463-
f'<a href="https://github.com/thygate/{REPOSITORY_NAME}/issues">here</a>. ' \
464-
'Make sure to provide the full stacktrace: \n'
465-
msg += '<code style="white-space: pre;">' + traceback.format_exc() + '</code>'
517+
msg = format_exception(e)
466518
break
467519
if type == 'simple_mesh':
468520
mesh_simple_fi = result
469521
continue
470522
if type == 'inpainted_mesh':
471523
inpainted_mesh_fi = result
472524
continue
525+
if not isinstance(result, Image.Image):
526+
print(f'This is not supposed to happen! Somehow output type {type} is not supported! Input_i: {input_i}.')
527+
continue
528+
img_results += [(input_i, type, result)]
473529

474-
basename = 'depthmap'
475-
if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.get_opt('outdir_extras_samples', None):
476-
basename = Path(inputnames[input_i]).stem
477-
478-
show_images += [result]
479530
if inputs["save_outputs"]:
480531
try:
532+
basename = 'depthmap'
533+
if depthmap_mode == '2' and inputnames[input_i] is not None:
534+
if outpath != backbone.get_opt('outdir_extras_samples', None):
535+
basename = Path(inputnames[input_i]).stem
481536
suffix = "" if type == "depth" else f"{type}"
482537
backbone.save_image(result, path=outpath, basename=basename, seed=None,
483538
prompt=None, extension=backbone.get_opt('samples_format', 'png'), short_filename=True,
@@ -496,4 +551,4 @@ def run_generate(*inputs):
496551
if backbone.get_opt('depthmap_script_show_3d_inpaint', True):
497552
if inpainted_mesh_fi is not None and len(inpainted_mesh_fi) > 0:
498553
display_mesh_fi = inpainted_mesh_fi
499-
return show_images, inpainted_mesh_fi, display_mesh_fi, msg.replace('\n', '<br>')
554+
return map(lambda x: x[2], img_results), inpainted_mesh_fi, display_mesh_fi, msg.replace('\n', '<br>')

src/core.py

Lines changed: 60 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from pathlib import Path
2+
3+
import PIL.Image
24
from PIL import Image
35

46
try:
@@ -37,6 +39,14 @@
3739
model_holder = ModelHolder()
3840

3941

42+
def convert_to_i16(arr):
43+
# Single channel, 16 bit image. This loses some precision!
44+
# uint16 conversion uses round-down, therefore values should be [0; 2**16)
45+
numbytes = 2
46+
max_val = (2 ** (8 * numbytes))
47+
out = np.clip(arr * max_val, 0, max_val - 0.1) # -0.1 from above is needed to avoid overflowing
48+
return out.astype("uint16")
49+
4050
def convert_i16_to_rgb(image, like):
4151
# three channel, 8 bits per channel image
4252
output = np.zeros_like(like)
@@ -50,6 +60,10 @@ class CoreGenerationFunnelInp:
5060
"""This class takes a dictionary and creates a core_generation_funnel inp.
5161
Non-applicable parameters are silently discarded (no error)"""
5262
def __init__(self, values):
63+
if isinstance(values, CoreGenerationFunnelInp):
64+
values = values.values
65+
values = {(k.name if isinstance(k, GenerationOptions) else k).lower(): v for k, v in values.items()}
66+
5367
self.values = {}
5468
for setting in GenerationOptions:
5569
name = setting.name.lower()
@@ -74,7 +88,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
7488
inp = CoreGenerationFunnelInp(inp)
7589

7690
if ops is None:
77-
ops = {}
91+
ops = backbone.gather_ops()
7892
model_holder.update_settings(**ops)
7993

8094
# TODO: ideally, run_depthmap should not save meshes - that makes the function not pure
@@ -127,17 +141,37 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
127141
raw_prediction_invert = False
128142
"""True if near=dark on raw_prediction"""
129143
out = None
144+
130145
if inputdepthmaps is not None and inputdepthmaps[count] is not None:
131146
# use custom depthmap
132-
dimg = inputdepthmaps[count]
133-
# resize if not same size as input
134-
if dimg.width != inputimages[count].width or dimg.height != inputimages[count].height:
135-
dimg = dimg.resize((inputimages[count].width, inputimages[count].height), Image.Resampling.LANCZOS)
136-
137-
if dimg.mode == 'I' or dimg.mode == 'P' or dimg.mode == 'L':
138-
out = np.asarray(dimg, dtype="float")
147+
dp = inputdepthmaps[count]
148+
if isinstance(dp, Image.Image):
149+
if dp.width != inputimages[count].width or dp.height != inputimages[count].height:
150+
try: # LANCZOS may fail on some formats
151+
dp = dp.resize((inputimages[count].width, inputimages[count].height), Image.Resampling.LANCZOS)
152+
except:
153+
dp = dp.resize((inputimages[count].width, inputimages[count].height))
154+
# Trying desperately to rescale image to [0;1) without actually normalizing it
155+
# Normalizing is avoided, because we want to preserve the scale of the original depthmaps
156+
# (batch mode, video mode).
157+
if len(dp.getbands()) == 1:
158+
out = np.asarray(dp, dtype="float")
159+
out_max = out.max()
160+
if out_max < 256:
161+
bit_depth = 8
162+
elif out_max < 65536:
163+
bit_depth = 16
164+
else:
165+
bit_depth = 32
166+
out /= 2.0 ** bit_depth
167+
else:
168+
out = np.asarray(dp, dtype="float")[:, :, 0]
169+
out /= 256.0
139170
else:
140-
out = np.asarray(dimg, dtype="float")[:, :, 0]
171+
# Should be in interval [0; 1], values outside of this range will be clipped.
172+
out = np.asarray(dp, dtype="float")
173+
assert inputimages[count].height == out.shape[0], "Custom depthmap height mismatch"
174+
assert inputimages[count].width == out.shape[1], "Custom depthmap width mismatch"
141175
else:
142176
# override net size (size may be different for different images)
143177
if inp[go.NET_SIZE_MATCH]:
@@ -156,20 +190,20 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
156190
# TODO: some models may output negative values, maybe these should be clamped to zero.
157191
if raw_prediction_invert:
158192
out *= -1
193+
if inp[go.DO_OUTPUT_DEPTH_PREDICTION]:
194+
yield count, 'depth_prediction', np.copy(out)
159195
if inp[go.CLIPDEPTH]:
160196
out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1]
161197
out = np.clip(out, inp[go.CLIPDEPTH_FAR], inp[go.CLIPDEPTH_NEAR])
198+
out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1]
162199
else:
163200
# Regretfully, the depthmap is broken and will be replaced with a black image
164201
out = np.zeros(raw_prediction.shape)
165-
out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1]
166-
167-
# Single channel, 16 bit image. This loses some precision!
168-
# uint16 conversion uses round-down, therefore values should be [0; 2**16)
169-
numbytes = 2
170-
max_val = (2 ** (8 * numbytes))
171-
out = np.clip(out * max_val, 0, max_val - 0.1) # Clipping form above is needed to avoid overflowing
172-
img_output = out.astype("uint16")
202+
203+
# Maybe we should not use img_output for everything, since we get better accuracy from
204+
# the raw_prediction. However, it is not always supported. We maybe would like to achieve
205+
# reproducibility, so depthmap of the image should be the same as generating the depthmap one more time.
206+
img_output = convert_to_i16(out)
173207
"""Depthmap (near=bright), as uint16"""
174208

175209
# if 3dinpainting, store maps for processing in second pass
@@ -198,8 +232,8 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
198232

199233
# A weird quirk: if user tries to save depthmap, whereas custom depthmap is used,
200234
# depthmap will not be outputed, even if output_depth_combine is used.
201-
if inp[go.DO_OUTPUT_DEPTH] and inputdepthmaps[count] is None:
202-
if inp[go.DO_OUTPUT_DEPTH]:
235+
if inp[go.DO_OUTPUT_DEPTH]:
236+
if inputdepthmaps[count] is None:
203237
img_depth = cv2.bitwise_not(img_output) if inp[go.OUTPUT_DEPTH_INVERT] else img_output
204238
if inp[go.OUTPUT_DEPTH_COMBINE]:
205239
axis = 1 if inp[go.OUTPUT_DEPTH_COMBINE_AXIS] == 'Horizontal' else 0
@@ -209,6 +243,13 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
209243
yield count, 'concat_depth', img_concat
210244
else:
211245
yield count, 'depth', Image.fromarray(img_depth)
246+
else:
247+
# TODO: make it better
248+
# Yes, this seems stupid, but this is, logically, what should happen -
249+
# and this improves clarity of some other code.
250+
# But we won't return it if there is only one image.
251+
if len(inputimages) > 1:
252+
yield count, 'depth', Image.fromarray(img_output)
212253

213254
if inp[go.GEN_STEREO]:
214255
print("Generating stereoscopic images..")
@@ -319,7 +360,6 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
319360

320361

321362
def get_uniquefn(outpath, basename, ext):
322-
# Inefficient and may fail, maybe use unbounded binary search?
323363
basecount = backbone.get_next_sequence_number(outpath, basename)
324364
if basecount > 0: basecount = basecount - 1
325365
fullfn = None

0 commit comments

Comments
 (0)