akhaliq HF Staff commited on
Commit
c0afa65
·
1 Parent(s): 66d2544

add wan animate

Browse files
Files changed (2) hide show
  1. app.py +382 -7
  2. requirements.txt +2 -1
app.py CHANGED
@@ -38,6 +38,8 @@ import atexit
38
  import asyncio
39
  from datetime import datetime, timedelta
40
  from typing import Optional
 
 
41
 
42
  # Gradio supported languages for syntax highlighting
43
  GRADIO_SUPPORTED_LANGUAGES = [
@@ -3679,6 +3681,226 @@ def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_
3679
  except Exception as e:
3680
  return f"Error generating music: {str(e)}"
3681
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3682
  def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
3683
  """Extract image generation prompts from the full text based on number of images needed"""
3684
  # Use the entire text as the base prompt for image generation
@@ -4331,7 +4553,7 @@ def create_video_replacement_blocks_from_input_video(html_content: str, user_pro
4331
  print("[Video2Video] No <body> tag; appending video via replacement block")
4332
  return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
4333
 
4334
- def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_video_to_video: bool = False, video_to_video_prompt: Optional[str] = None, input_video_data = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
4335
  """Apply text/image/video/music replacements to HTML content.
4336
 
4337
  - Works with single-document HTML strings
@@ -4361,8 +4583,57 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
4361
  try:
4362
  print(
4363
  f"[MediaApply] enable_i2v={enable_image_to_video}, enable_i2i={enable_image_to_image}, "
4364
- f"enable_t2i={enable_text_to_image}, enable_t2v={enable_text_to_video}, enable_v2v={enable_video_to_video}, enable_t2m={enable_text_to_music}, has_image={input_image_data is not None}, has_video={input_video_data is not None}"
4365
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4366
  # If image-to-video is enabled, replace the first image with a generated video and return.
4367
  if enable_image_to_video and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
4368
  i2v_prompt = (image_to_video_prompt or user_prompt or "").strip()
@@ -5516,7 +5787,7 @@ The HTML code above contains the complete original website structure with all im
5516
  stop_generation = False
5517
 
5518
 
5519
- def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto", enable_image_generation: bool = False, enable_image_to_image: bool = False, image_to_image_prompt: Optional[str] = None, text_to_image_prompt: Optional[str] = None, enable_image_to_video: bool = False, image_to_video_prompt: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_video_to_video: bool = False, video_to_video_prompt: Optional[str] = None, input_video_data = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None):
5520
  if query is None:
5521
  query = ''
5522
  if _history is None:
@@ -5793,6 +6064,11 @@ This will help me create a better design for you."""
5793
  input_video_data=input_video_data,
5794
  enable_text_to_music=enable_text_to_music,
5795
  text_to_music_prompt=text_to_music_prompt,
 
 
 
 
 
5796
  )
5797
 
5798
  _history.append([query, final_content])
@@ -5867,6 +6143,10 @@ This will help me create a better design for you."""
5867
  input_video_data=input_video_data,
5868
  enable_text_to_music=enable_text_to_music,
5869
  text_to_music_prompt=text_to_music_prompt,
 
 
 
 
5870
  token=None,
5871
  )
5872
 
@@ -5899,6 +6179,10 @@ This will help me create a better design for you."""
5899
  input_video_data=input_video_data,
5900
  enable_text_to_music=enable_text_to_music,
5901
  text_to_music_prompt=text_to_music_prompt,
 
 
 
 
5902
  token=None,
5903
  )
5904
  else:
@@ -6340,6 +6624,10 @@ This will help me create a better design for you."""
6340
  input_video_data=input_video_data,
6341
  enable_text_to_music=enable_text_to_music,
6342
  text_to_music_prompt=text_to_music_prompt,
 
 
 
 
6343
  token=None,
6344
  )
6345
 
@@ -6375,6 +6663,11 @@ This will help me create a better design for you."""
6375
  input_video_data=input_video_data,
6376
  enable_text_to_music=enable_text_to_music,
6377
  text_to_music_prompt=text_to_music_prompt,
 
 
 
 
 
6378
  )
6379
 
6380
  _history.append([query, final_content])
@@ -7674,6 +7967,38 @@ with gr.Blocks(
7674
  visible=False
7675
  )
7676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7677
  # LLM-guided media placement is now always on (no toggle in UI)
7678
 
7679
  def on_image_to_image_toggle(toggled, beta_enabled):
@@ -7719,6 +8044,21 @@ with gr.Blocks(
7719
  inputs=[text_to_music_toggle, beta_toggle],
7720
  outputs=[text_to_music_prompt]
7721
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7722
  model_dropdown = gr.Dropdown(
7723
  choices=[model['name'] for model in AVAILABLE_MODELS],
7724
  value=DEFAULT_MODEL_NAME,
@@ -8271,7 +8611,7 @@ with gr.Blocks(
8271
  show_progress="hidden",
8272
  ).then(
8273
  generation_code,
8274
- inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, video_to_video_toggle, video_to_video_prompt, video_input, text_to_music_toggle, text_to_music_prompt],
8275
  outputs=[code_output, history, sandbox, history_output]
8276
  ).then(
8277
  end_generation_ui,
@@ -8368,6 +8708,10 @@ with gr.Blocks(
8368
  upd_current_model = gr.skip()
8369
  upd_t2m_toggle = gr.skip()
8370
  upd_t2m_prompt = gr.skip()
 
 
 
 
8371
 
8372
  # Split by comma to separate main prompt and directives
8373
  segments = [seg.strip() for seg in (text or "").split(",") if seg.strip()]
@@ -8447,6 +8791,20 @@ with gr.Blocks(
8447
  if p:
8448
  upd_t2m_prompt = gr.update(value=p)
8449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8450
  # URL (website redesign)
8451
  url = _extract_url(seg)
8452
  if url:
@@ -8520,6 +8878,10 @@ with gr.Blocks(
8520
  upd_current_model,
8521
  upd_t2m_toggle,
8522
  upd_t2m_prompt,
 
 
 
 
8523
  )
8524
 
8525
  # Wire chat submit -> apply settings -> run generation
@@ -8550,6 +8912,10 @@ with gr.Blocks(
8550
  current_model,
8551
  text_to_music_toggle,
8552
  text_to_music_prompt,
 
 
 
 
8553
  ],
8554
  queue=False,
8555
  ).then(
@@ -8559,7 +8925,7 @@ with gr.Blocks(
8559
  show_progress="hidden",
8560
  ).then(
8561
  generation_code,
8562
- inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, video_to_video_toggle, video_to_video_prompt, video_input, text_to_music_toggle, text_to_music_prompt],
8563
  outputs=[code_output, history, sandbox, history_output]
8564
  ).then(
8565
  end_generation_ui,
@@ -8591,7 +8957,7 @@ with gr.Blocks(
8591
  )
8592
 
8593
  # Toggle between classic controls and beta chat UI
8594
- def toggle_beta(checked: bool, t2i: bool, i2i: bool, i2v: bool, t2v: bool, v2v: bool, t2m: bool):
8595
  # Prompts only visible in classic mode and when their toggles are on
8596
  t2i_vis = (not checked) and bool(t2i)
8597
  i2i_vis = (not checked) and bool(i2i)
@@ -8599,6 +8965,7 @@ with gr.Blocks(
8599
  t2v_vis = (not checked) and bool(t2v)
8600
  v2v_vis = (not checked) and bool(v2v)
8601
  t2m_vis = (not checked) and bool(t2m)
 
8602
 
8603
  return (
8604
  # Chat UI group
@@ -8627,6 +8994,10 @@ with gr.Blocks(
8627
  gr.update(visible=v2v_vis), # video_input
8628
  gr.update(visible=not checked), # text_to_music_toggle
8629
  gr.update(visible=t2m_vis), # text_to_music_prompt
 
 
 
 
8630
  gr.update(visible=not checked), # model_dropdown
8631
  gr.update(visible=not checked), # quick_start_md
8632
  gr.update(visible=not checked), # quick_examples_col
@@ -8634,7 +9005,7 @@ with gr.Blocks(
8634
 
8635
  beta_toggle.change(
8636
  toggle_beta,
8637
- inputs=[beta_toggle, image_generation_toggle, image_to_image_toggle, image_to_video_toggle, text_to_video_toggle, video_to_video_toggle, text_to_music_toggle],
8638
  outputs=[
8639
  sidebar_chatbot,
8640
  sidebar_msg,
@@ -8660,6 +9031,10 @@ with gr.Blocks(
8660
  video_input,
8661
  text_to_music_toggle,
8662
  text_to_music_prompt,
 
 
 
 
8663
  model_dropdown,
8664
  quick_start_md,
8665
  quick_examples_col,
 
38
  import asyncio
39
  from datetime import datetime, timedelta
40
  from typing import Optional
41
+ import dashscope
42
+ from dashscope.utils.oss_utils import check_and_upload_local
43
 
44
  # Gradio supported languages for syntax highlighting
45
  GRADIO_SUPPORTED_LANGUAGES = [
 
3681
  except Exception as e:
3682
  return f"Error generating music: {str(e)}"
3683
 
3684
+ class WanAnimateApp:
3685
+ """Wan2.2-Animate integration for character animation and video replacement using DashScope API"""
3686
+
3687
+ def __init__(self):
3688
+ self.api_key = os.getenv("DASHSCOPE_API_KEY")
3689
+ if self.api_key:
3690
+ dashscope.api_key = self.api_key
3691
+ self.url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/image2video/video-synthesis/"
3692
+ self.get_url = "https://dashscope.aliyuncs.com/api/v1/tasks/"
3693
+
3694
+ def predict(self, ref_img, video, model_id, model):
3695
+ """
3696
+ Generate animated video using Wan2.2-Animate
3697
+
3698
+ Args:
3699
+ ref_img: Reference image file path
3700
+ video: Template video file path
3701
+ model_id: Animation mode ("wan2.2-animate-move" or "wan2.2-animate-mix")
3702
+ model: Inference quality ("wan-pro" or "wan-std")
3703
+
3704
+ Returns:
3705
+ Tuple of (video_url, status_message)
3706
+ """
3707
+ if not self.api_key:
3708
+ return None, "Error: DASHSCOPE_API_KEY environment variable is not set"
3709
+
3710
+ try:
3711
+ # Upload files to OSS if needed and get URLs
3712
+ _, image_url = check_and_upload_local(model_id, ref_img, self.api_key)
3713
+ _, video_url = check_and_upload_local(model_id, video, self.api_key)
3714
+
3715
+ # Prepare the request payload
3716
+ payload = {
3717
+ "model": model_id,
3718
+ "input": {
3719
+ "image_url": image_url,
3720
+ "video_url": video_url
3721
+ },
3722
+ "parameters": {
3723
+ "check_image": True,
3724
+ "mode": model,
3725
+ }
3726
+ }
3727
+
3728
+ # Set up headers
3729
+ headers = {
3730
+ "X-DashScope-Async": "enable",
3731
+ "X-DashScope-OssResourceResolve": "enable",
3732
+ "Authorization": f"Bearer {self.api_key}",
3733
+ "Content-Type": "application/json"
3734
+ }
3735
+
3736
+ # Make the initial API request
3737
+ response = requests.post(self.url, json=payload, headers=headers)
3738
+
3739
+ # Check if request was successful
3740
+ if response.status_code != 200:
3741
+ error_msg = f"Initial request failed with status code {response.status_code}: {response.text}"
3742
+ print(f"[WanAnimate] {error_msg}")
3743
+ return None, error_msg
3744
+
3745
+ # Get the task ID from response
3746
+ result = response.json()
3747
+ task_id = result.get("output", {}).get("task_id")
3748
+ if not task_id:
3749
+ error_msg = "Failed to get task ID from response"
3750
+ print(f"[WanAnimate] {error_msg}")
3751
+ return None, error_msg
3752
+
3753
+ # Poll for results
3754
+ get_url = f"{self.get_url}/{task_id}"
3755
+ headers = {
3756
+ "Authorization": f"Bearer {self.api_key}",
3757
+ "Content-Type": "application/json"
3758
+ }
3759
+
3760
+ max_attempts = 60 # 5 minutes max wait time
3761
+ attempt = 0
3762
+
3763
+ while attempt < max_attempts:
3764
+ response = requests.get(get_url, headers=headers)
3765
+ if response.status_code != 200:
3766
+ error_msg = f"Failed to get task status: {response.status_code}: {response.text}"
3767
+ print(f"[WanAnimate] {error_msg}")
3768
+ return None, error_msg
3769
+
3770
+ result = response.json()
3771
+ print(f"[WanAnimate] Task status check {attempt + 1}: {result}")
3772
+ task_status = result.get("output", {}).get("task_status")
3773
+
3774
+ if task_status == "SUCCEEDED":
3775
+ # Task completed successfully, return video URL
3776
+ video_url = result["output"]["results"]["video_url"]
3777
+ print(f"[WanAnimate] Animation completed successfully: {video_url}")
3778
+ return video_url, "SUCCEEDED"
3779
+ elif task_status == "FAILED":
3780
+ # Task failed, return error message
3781
+ error_msg = result.get("output", {}).get("message", "Unknown error")
3782
+ code_msg = result.get("output", {}).get("code", "Unknown code")
3783
+ full_error = f"Task failed: {error_msg} Code: {code_msg} TaskId: {task_id}"
3784
+ print(f"[WanAnimate] {full_error}")
3785
+ return None, full_error
3786
+ else:
3787
+ # Task is still running, wait and retry
3788
+ time.sleep(5) # Wait 5 seconds before polling again
3789
+ attempt += 1
3790
+
3791
+ # Timeout reached
3792
+ timeout_msg = f"Animation generation timed out after {max_attempts * 5} seconds. TaskId: {task_id}"
3793
+ print(f"[WanAnimate] {timeout_msg}")
3794
+ return None, timeout_msg
3795
+
3796
+ except Exception as e:
3797
+ error_msg = f"Exception during animation generation: {str(e)}"
3798
+ print(f"[WanAnimate] {error_msg}")
3799
+ return None, error_msg
3800
+
3801
+ def generate_animation_from_image_video(input_image_data, input_video_data, prompt: str, model_id: str = "wan2.2-animate-move", model: str = "wan-pro", session_id: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
3802
+ """Generate animated video from reference image and template video using Wan2.2-Animate.
3803
+
3804
+ Returns an HTML <video> tag whose source points to a temporary file URL.
3805
+ """
3806
+ try:
3807
+ print(f"[ImageVideo2Animation] Starting animation generation with model={model_id}, quality={model}")
3808
+
3809
+ if not os.getenv("DASHSCOPE_API_KEY"):
3810
+ print("[ImageVideo2Animation] Missing DASHSCOPE_API_KEY")
3811
+ return "Error: DASHSCOPE_API_KEY environment variable is not set. Please configure your DashScope API key."
3812
+
3813
+ # Normalize inputs to file paths
3814
+ def _save_to_temp_file(data, suffix):
3815
+ if isinstance(data, str) and os.path.exists(data):
3816
+ return data
3817
+ elif hasattr(data, 'name') and os.path.exists(data.name):
3818
+ return data.name
3819
+ else:
3820
+ # Save to temporary file
3821
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
3822
+ if hasattr(data, 'read'):
3823
+ temp_file.write(data.read())
3824
+ elif isinstance(data, (bytes, bytearray)):
3825
+ temp_file.write(data)
3826
+ elif isinstance(data, np.ndarray):
3827
+ # Handle numpy array (likely image data)
3828
+ if suffix.lower() in ['.jpg', '.jpeg', '.png']:
3829
+ # Convert numpy array to image
3830
+ from PIL import Image
3831
+ if data.dtype != np.uint8:
3832
+ data = (data * 255).astype(np.uint8)
3833
+ if len(data.shape) == 3 and data.shape[2] == 3:
3834
+ # RGB image
3835
+ img = Image.fromarray(data, 'RGB')
3836
+ elif len(data.shape) == 3 and data.shape[2] == 4:
3837
+ # RGBA image
3838
+ img = Image.fromarray(data, 'RGBA')
3839
+ elif len(data.shape) == 2:
3840
+ # Grayscale image
3841
+ img = Image.fromarray(data, 'L')
3842
+ else:
3843
+ raise ValueError(f"Unsupported numpy array shape for image: {data.shape}")
3844
+ img.save(temp_file.name, format='JPEG' if suffix.lower() in ['.jpg', '.jpeg'] else 'PNG')
3845
+ else:
3846
+ raise ValueError(f"Cannot save numpy array as {suffix} format")
3847
+ else:
3848
+ raise ValueError(f"Unsupported data type: {type(data)}")
3849
+ temp_file.close()
3850
+ return temp_file.name
3851
+
3852
+ ref_img_path = _save_to_temp_file(input_image_data, '.jpg')
3853
+ video_path = _save_to_temp_file(input_video_data, '.mp4')
3854
+
3855
+ print(f"[ImageVideo2Animation] Input files prepared: image={ref_img_path}, video={video_path}")
3856
+
3857
+ # Initialize WanAnimateApp and generate animation
3858
+ wan_app = WanAnimateApp()
3859
+ video_url, status = wan_app.predict(ref_img_path, video_path, model_id, model)
3860
+
3861
+ if video_url and status == "SUCCEEDED":
3862
+ print(f"[ImageVideo2Animation] Animation generated successfully: {video_url}")
3863
+
3864
+ # Download the video and create temporary URL
3865
+ try:
3866
+ response = requests.get(video_url, timeout=60)
3867
+ response.raise_for_status()
3868
+ video_bytes = response.content
3869
+
3870
+ filename = "wan_animate_result.mp4"
3871
+ temp_url = upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
3872
+
3873
+ if temp_url.startswith("Error"):
3874
+ print(f"[ImageVideo2Animation] Failed to upload video: {temp_url}")
3875
+ return temp_url
3876
+
3877
+ # Create video HTML tag
3878
+ video_html = (
3879
+ f'<video controls autoplay muted loop playsinline '
3880
+ f'style="max-width:100%; height:auto; border-radius:8px; box-shadow:0 4px 8px rgba(0,0,0,0.1)" '
3881
+ f'onerror="this.style.display=\'none\'; console.error(\'Animation video failed to load\')">'
3882
+ f'<source src="{temp_url}" type="video/mp4" />'
3883
+ f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
3884
+ f'</video>'
3885
+ )
3886
+
3887
+ print(f"[ImageVideo2Animation] Successfully created animation HTML with temporary URL: {temp_url}")
3888
+ return video_html
3889
+
3890
+ except Exception as e:
3891
+ error_msg = f"Failed to download generated animation: {str(e)}"
3892
+ print(f"[ImageVideo2Animation] {error_msg}")
3893
+ return f"Error: {error_msg}"
3894
+ else:
3895
+ error_msg = f"Animation generation failed: {status}"
3896
+ print(f"[ImageVideo2Animation] {error_msg}")
3897
+ return f"Error: {error_msg}"
3898
+
3899
+ except Exception as e:
3900
+ print(f"[ImageVideo2Animation] Exception during generation:")
3901
+ print(f"Animation generation error: {str(e)}")
3902
+ return f"Error generating animation: {str(e)}"
3903
+
3904
  def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
3905
  """Extract image generation prompts from the full text based on number of images needed"""
3906
  # Use the entire text as the base prompt for image generation
 
4553
  print("[Video2Video] No <body> tag; appending video via replacement block")
4554
  return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
4555
 
4556
+ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_video_to_video: bool = False, video_to_video_prompt: Optional[str] = None, input_video_data = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None, enable_image_video_to_animation: bool = False, animation_mode: str = "wan2.2-animate-move", animation_quality: str = "wan-pro", animation_video_data = None, token: gr.OAuthToken | None = None) -> str:
4557
  """Apply text/image/video/music replacements to HTML content.
4558
 
4559
  - Works with single-document HTML strings
 
4583
  try:
4584
  print(
4585
  f"[MediaApply] enable_i2v={enable_image_to_video}, enable_i2i={enable_image_to_image}, "
4586
+ f"enable_t2i={enable_text_to_image}, enable_t2v={enable_text_to_video}, enable_v2v={enable_video_to_video}, enable_t2m={enable_text_to_music}, enable_iv2a={enable_image_video_to_animation}, has_image={input_image_data is not None}, has_video={input_video_data is not None}, has_anim_video={animation_video_data is not None}"
4587
  )
4588
+
4589
+ # If image+video-to-animation is enabled, generate animated video and return.
4590
+ if enable_image_video_to_animation and input_image_data is not None and animation_video_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
4591
+ print(f"[MediaApply] Running image+video-to-animation with mode={animation_mode}, quality={animation_quality}")
4592
+ try:
4593
+ animation_html_tag = generate_animation_from_image_video(
4594
+ input_image_data,
4595
+ animation_video_data,
4596
+ user_prompt or "",
4597
+ model_id=animation_mode,
4598
+ model=animation_quality,
4599
+ session_id=session_id,
4600
+ token=token
4601
+ )
4602
+ if not (animation_html_tag or "").startswith("Error"):
4603
+ # Validate animation video HTML before attempting placement
4604
+ if validate_video_html(animation_html_tag):
4605
+ blocks_anim = llm_place_media(result, animation_html_tag, media_kind="video")
4606
+ else:
4607
+ print("[MediaApply] Generated animation HTML failed validation, skipping LLM placement")
4608
+ blocks_anim = ""
4609
+ else:
4610
+ print(f"[MediaApply] Animation generation failed: {animation_html_tag}")
4611
+ blocks_anim = ""
4612
+ except Exception as e:
4613
+ print(f"[MediaApply] Exception during animation generation: {str(e)}")
4614
+ blocks_anim = ""
4615
+
4616
+ # If LLM placement failed, use fallback placement
4617
+ if not blocks_anim:
4618
+ # Create simple replacement block for animation video
4619
+ blocks_anim = f"""{SEARCH_START}
4620
+ </head>
4621
+
4622
+ {DIVIDER}
4623
+ </head>
4624
+ <div class="animation-container" style="margin: 20px 0; text-align: center;">
4625
+ {animation_html_tag}
4626
+ </div>
4627
+ {REPLACE_END}"""
4628
+
4629
+ if blocks_anim:
4630
+ print("[MediaApply] Applying animation replacement blocks")
4631
+ result = apply_search_replace_changes(result, blocks_anim)
4632
+ if is_multipage and entry_html_path:
4633
+ multipage_files[entry_html_path] = result
4634
+ return format_multipage_output(multipage_files)
4635
+ return result
4636
+
4637
  # If image-to-video is enabled, replace the first image with a generated video and return.
4638
  if enable_image_to_video and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
4639
  i2v_prompt = (image_to_video_prompt or user_prompt or "").strip()
 
5787
  stop_generation = False
5788
 
5789
 
5790
+ def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto", enable_image_generation: bool = False, enable_image_to_image: bool = False, image_to_image_prompt: Optional[str] = None, text_to_image_prompt: Optional[str] = None, enable_image_to_video: bool = False, image_to_video_prompt: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_video_to_video: bool = False, video_to_video_prompt: Optional[str] = None, input_video_data = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None, enable_image_video_to_animation: bool = False, animation_mode: str = "wan2.2-animate-move", animation_quality: str = "wan-pro", animation_video_data = None):
5791
  if query is None:
5792
  query = ''
5793
  if _history is None:
 
6064
  input_video_data=input_video_data,
6065
  enable_text_to_music=enable_text_to_music,
6066
  text_to_music_prompt=text_to_music_prompt,
6067
+ enable_image_video_to_animation=enable_image_video_to_animation,
6068
+ animation_mode=animation_mode,
6069
+ animation_quality=animation_quality,
6070
+ animation_video_data=animation_video_data,
6071
+ token=None,
6072
  )
6073
 
6074
  _history.append([query, final_content])
 
6143
  input_video_data=input_video_data,
6144
  enable_text_to_music=enable_text_to_music,
6145
  text_to_music_prompt=text_to_music_prompt,
6146
+ enable_image_video_to_animation=enable_image_video_to_animation,
6147
+ animation_mode=animation_mode,
6148
+ animation_quality=animation_quality,
6149
+ animation_video_data=animation_video_data,
6150
  token=None,
6151
  )
6152
 
 
6179
  input_video_data=input_video_data,
6180
  enable_text_to_music=enable_text_to_music,
6181
  text_to_music_prompt=text_to_music_prompt,
6182
+ enable_image_video_to_animation=enable_image_video_to_animation,
6183
+ animation_mode=animation_mode,
6184
+ animation_quality=animation_quality,
6185
+ animation_video_data=animation_video_data,
6186
  token=None,
6187
  )
6188
  else:
 
6624
  input_video_data=input_video_data,
6625
  enable_text_to_music=enable_text_to_music,
6626
  text_to_music_prompt=text_to_music_prompt,
6627
+ enable_image_video_to_animation=enable_image_video_to_animation,
6628
+ animation_mode=animation_mode,
6629
+ animation_quality=animation_quality,
6630
+ animation_video_data=animation_video_data,
6631
  token=None,
6632
  )
6633
 
 
6663
  input_video_data=input_video_data,
6664
  enable_text_to_music=enable_text_to_music,
6665
  text_to_music_prompt=text_to_music_prompt,
6666
+ enable_image_video_to_animation=enable_image_video_to_animation,
6667
+ animation_mode=animation_mode,
6668
+ animation_quality=animation_quality,
6669
+ animation_video_data=animation_video_data,
6670
+ token=None,
6671
  )
6672
 
6673
  _history.append([query, final_content])
 
7967
  visible=False
7968
  )
7969
 
7970
+ # Image+Video to Animation
7971
+ image_video_to_animation_toggle = gr.Checkbox(
7972
+ label="🎭 Character Animation (uses input image + video)",
7973
+ value=False,
7974
+ visible=True,
7975
+ info="Animate characters using Wan2.2-Animate with reference image and template video"
7976
+ )
7977
+ animation_mode_dropdown = gr.Dropdown(
7978
+ label="Animation Mode",
7979
+ choices=[
7980
+ ("Move Mode (animate character with video motion)", "wan2.2-animate-move"),
7981
+ ("Mix Mode (replace character in video)", "wan2.2-animate-mix")
7982
+ ],
7983
+ value="wan2.2-animate-move",
7984
+ visible=False,
7985
+ info="Move: animate image character with video motion. Mix: replace video character with image character"
7986
+ )
7987
+ animation_quality_dropdown = gr.Dropdown(
7988
+ label="Animation Quality",
7989
+ choices=[
7990
+ ("Professional (25fps, 720p)", "wan-pro"),
7991
+ ("Standard (15fps, 720p)", "wan-std")
7992
+ ],
7993
+ value="wan-pro",
7994
+ visible=False,
7995
+ info="Higher quality takes more time to generate"
7996
+ )
7997
+ animation_video_input = gr.Video(
7998
+ label="Template video for animation (upload a video to use as motion template or character replacement source)",
7999
+ visible=False
8000
+ )
8001
+
8002
  # LLM-guided media placement is now always on (no toggle in UI)
8003
 
8004
  def on_image_to_image_toggle(toggled, beta_enabled):
 
8044
  inputs=[text_to_music_toggle, beta_toggle],
8045
  outputs=[text_to_music_prompt]
8046
  )
8047
+
8048
+ def on_image_video_to_animation_toggle(toggled, beta_enabled):
8049
+ vis = bool(toggled) and not bool(beta_enabled)
8050
+ return (
8051
+ gr.update(visible=vis), # generation_image_input
8052
+ gr.update(visible=vis), # animation_mode_dropdown
8053
+ gr.update(visible=vis), # animation_quality_dropdown
8054
+ gr.update(visible=vis), # animation_video_input
8055
+ )
8056
+
8057
+ image_video_to_animation_toggle.change(
8058
+ on_image_video_to_animation_toggle,
8059
+ inputs=[image_video_to_animation_toggle, beta_toggle],
8060
+ outputs=[generation_image_input, animation_mode_dropdown, animation_quality_dropdown, animation_video_input]
8061
+ )
8062
  model_dropdown = gr.Dropdown(
8063
  choices=[model['name'] for model in AVAILABLE_MODELS],
8064
  value=DEFAULT_MODEL_NAME,
 
8611
  show_progress="hidden",
8612
  ).then(
8613
  generation_code,
8614
+ inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, video_to_video_toggle, video_to_video_prompt, video_input, text_to_music_toggle, text_to_music_prompt, image_video_to_animation_toggle, animation_mode_dropdown, animation_quality_dropdown, animation_video_input],
8615
  outputs=[code_output, history, sandbox, history_output]
8616
  ).then(
8617
  end_generation_ui,
 
8708
  upd_current_model = gr.skip()
8709
  upd_t2m_toggle = gr.skip()
8710
  upd_t2m_prompt = gr.skip()
8711
+ upd_iv2a_toggle = gr.skip()
8712
+ upd_anim_mode = gr.skip()
8713
+ upd_anim_quality = gr.skip()
8714
+ upd_anim_video = gr.skip()
8715
 
8716
  # Split by comma to separate main prompt and directives
8717
  segments = [seg.strip() for seg in (text or "").split(",") if seg.strip()]
 
8791
  if p:
8792
  upd_t2m_prompt = gr.update(value=p)
8793
 
8794
+ # Image+Video-to-Animation
8795
+ if ("animate" in seg_norm) or ("character animation" in seg_norm) or ("wan animate" in seg_norm):
8796
+ upd_iv2a_toggle = gr.update(value=True)
8797
+ # Check for mode specification
8798
+ if "move mode" in seg_norm:
8799
+ upd_anim_mode = gr.update(value="wan2.2-animate-move")
8800
+ elif "mix mode" in seg_norm:
8801
+ upd_anim_mode = gr.update(value="wan2.2-animate-mix")
8802
+ # Check for quality specification
8803
+ if "standard quality" in seg_norm or "std quality" in seg_norm:
8804
+ upd_anim_quality = gr.update(value="wan-std")
8805
+ elif "professional quality" in seg_norm or "pro quality" in seg_norm:
8806
+ upd_anim_quality = gr.update(value="wan-pro")
8807
+
8808
  # URL (website redesign)
8809
  url = _extract_url(seg)
8810
  if url:
 
8878
  upd_current_model,
8879
  upd_t2m_toggle,
8880
  upd_t2m_prompt,
8881
+ upd_iv2a_toggle,
8882
+ upd_anim_mode,
8883
+ upd_anim_quality,
8884
+ upd_anim_video,
8885
  )
8886
 
8887
  # Wire chat submit -> apply settings -> run generation
 
8912
  current_model,
8913
  text_to_music_toggle,
8914
  text_to_music_prompt,
8915
+ image_video_to_animation_toggle,
8916
+ animation_mode_dropdown,
8917
+ animation_quality_dropdown,
8918
+ animation_video_input,
8919
  ],
8920
  queue=False,
8921
  ).then(
 
8925
  show_progress="hidden",
8926
  ).then(
8927
  generation_code,
8928
+ inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, video_to_video_toggle, video_to_video_prompt, video_input, text_to_music_toggle, text_to_music_prompt, image_video_to_animation_toggle, animation_mode_dropdown, animation_quality_dropdown, animation_video_input],
8929
  outputs=[code_output, history, sandbox, history_output]
8930
  ).then(
8931
  end_generation_ui,
 
8957
  )
8958
 
8959
  # Toggle between classic controls and beta chat UI
8960
+ def toggle_beta(checked: bool, t2i: bool, i2i: bool, i2v: bool, t2v: bool, v2v: bool, t2m: bool, iv2a: bool):
8961
  # Prompts only visible in classic mode and when their toggles are on
8962
  t2i_vis = (not checked) and bool(t2i)
8963
  i2i_vis = (not checked) and bool(i2i)
 
8965
  t2v_vis = (not checked) and bool(t2v)
8966
  v2v_vis = (not checked) and bool(v2v)
8967
  t2m_vis = (not checked) and bool(t2m)
8968
+ iv2a_vis = (not checked) and bool(iv2a)
8969
 
8970
  return (
8971
  # Chat UI group
 
8994
  gr.update(visible=v2v_vis), # video_input
8995
  gr.update(visible=not checked), # text_to_music_toggle
8996
  gr.update(visible=t2m_vis), # text_to_music_prompt
8997
+ gr.update(visible=not checked), # image_video_to_animation_toggle
8998
+ gr.update(visible=iv2a_vis), # animation_mode_dropdown
8999
+ gr.update(visible=iv2a_vis), # animation_quality_dropdown
9000
+ gr.update(visible=iv2a_vis), # animation_video_input
9001
  gr.update(visible=not checked), # model_dropdown
9002
  gr.update(visible=not checked), # quick_start_md
9003
  gr.update(visible=not checked), # quick_examples_col
 
9005
 
9006
  beta_toggle.change(
9007
  toggle_beta,
9008
+ inputs=[beta_toggle, image_generation_toggle, image_to_image_toggle, image_to_video_toggle, text_to_video_toggle, video_to_video_toggle, text_to_music_toggle, image_video_to_animation_toggle],
9009
  outputs=[
9010
  sidebar_chatbot,
9011
  sidebar_msg,
 
9031
  video_input,
9032
  text_to_music_toggle,
9033
  text_to_music_prompt,
9034
+ image_video_to_animation_toggle,
9035
+ animation_mode_dropdown,
9036
+ animation_quality_dropdown,
9037
+ animation_video_input,
9038
  model_dropdown,
9039
  quick_start_md,
9040
  quick_examples_col,
requirements.txt CHANGED
@@ -10,4 +10,5 @@ requests
10
  beautifulsoup4
11
  html2text
12
  openai
13
- mistralai
 
 
10
  beautifulsoup4
11
  html2text
12
  openai
13
+ mistralai
14
+ dashscope