Spaces:
Running
Running
add wan animate
Browse files- app.py +382 -7
- requirements.txt +2 -1
app.py
CHANGED
@@ -38,6 +38,8 @@ import atexit
|
|
38 |
import asyncio
|
39 |
from datetime import datetime, timedelta
|
40 |
from typing import Optional
|
|
|
|
|
41 |
|
42 |
# Gradio supported languages for syntax highlighting
|
43 |
GRADIO_SUPPORTED_LANGUAGES = [
|
@@ -3679,6 +3681,226 @@ def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_
|
|
3679 |
except Exception as e:
|
3680 |
return f"Error generating music: {str(e)}"
|
3681 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3682 |
def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
|
3683 |
"""Extract image generation prompts from the full text based on number of images needed"""
|
3684 |
# Use the entire text as the base prompt for image generation
|
@@ -4331,7 +4553,7 @@ def create_video_replacement_blocks_from_input_video(html_content: str, user_pro
|
|
4331 |
print("[Video2Video] No <body> tag; appending video via replacement block")
|
4332 |
return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
|
4333 |
|
4334 |
-
def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_video_to_video: bool = False, video_to_video_prompt: Optional[str] = None, input_video_data = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
|
4335 |
"""Apply text/image/video/music replacements to HTML content.
|
4336 |
|
4337 |
- Works with single-document HTML strings
|
@@ -4361,8 +4583,57 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
|
|
4361 |
try:
|
4362 |
print(
|
4363 |
f"[MediaApply] enable_i2v={enable_image_to_video}, enable_i2i={enable_image_to_image}, "
|
4364 |
-
f"enable_t2i={enable_text_to_image}, enable_t2v={enable_text_to_video}, enable_v2v={enable_video_to_video}, enable_t2m={enable_text_to_music}, has_image={input_image_data is not None}, has_video={input_video_data is not None}"
|
4365 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4366 |
# If image-to-video is enabled, replace the first image with a generated video and return.
|
4367 |
if enable_image_to_video and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
|
4368 |
i2v_prompt = (image_to_video_prompt or user_prompt or "").strip()
|
@@ -5516,7 +5787,7 @@ The HTML code above contains the complete original website structure with all im
|
|
5516 |
stop_generation = False
|
5517 |
|
5518 |
|
5519 |
-
def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto", enable_image_generation: bool = False, enable_image_to_image: bool = False, image_to_image_prompt: Optional[str] = None, text_to_image_prompt: Optional[str] = None, enable_image_to_video: bool = False, image_to_video_prompt: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_video_to_video: bool = False, video_to_video_prompt: Optional[str] = None, input_video_data = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None):
|
5520 |
if query is None:
|
5521 |
query = ''
|
5522 |
if _history is None:
|
@@ -5793,6 +6064,11 @@ This will help me create a better design for you."""
|
|
5793 |
input_video_data=input_video_data,
|
5794 |
enable_text_to_music=enable_text_to_music,
|
5795 |
text_to_music_prompt=text_to_music_prompt,
|
|
|
|
|
|
|
|
|
|
|
5796 |
)
|
5797 |
|
5798 |
_history.append([query, final_content])
|
@@ -5867,6 +6143,10 @@ This will help me create a better design for you."""
|
|
5867 |
input_video_data=input_video_data,
|
5868 |
enable_text_to_music=enable_text_to_music,
|
5869 |
text_to_music_prompt=text_to_music_prompt,
|
|
|
|
|
|
|
|
|
5870 |
token=None,
|
5871 |
)
|
5872 |
|
@@ -5899,6 +6179,10 @@ This will help me create a better design for you."""
|
|
5899 |
input_video_data=input_video_data,
|
5900 |
enable_text_to_music=enable_text_to_music,
|
5901 |
text_to_music_prompt=text_to_music_prompt,
|
|
|
|
|
|
|
|
|
5902 |
token=None,
|
5903 |
)
|
5904 |
else:
|
@@ -6340,6 +6624,10 @@ This will help me create a better design for you."""
|
|
6340 |
input_video_data=input_video_data,
|
6341 |
enable_text_to_music=enable_text_to_music,
|
6342 |
text_to_music_prompt=text_to_music_prompt,
|
|
|
|
|
|
|
|
|
6343 |
token=None,
|
6344 |
)
|
6345 |
|
@@ -6375,6 +6663,11 @@ This will help me create a better design for you."""
|
|
6375 |
input_video_data=input_video_data,
|
6376 |
enable_text_to_music=enable_text_to_music,
|
6377 |
text_to_music_prompt=text_to_music_prompt,
|
|
|
|
|
|
|
|
|
|
|
6378 |
)
|
6379 |
|
6380 |
_history.append([query, final_content])
|
@@ -7674,6 +7967,38 @@ with gr.Blocks(
|
|
7674 |
visible=False
|
7675 |
)
|
7676 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7677 |
# LLM-guided media placement is now always on (no toggle in UI)
|
7678 |
|
7679 |
def on_image_to_image_toggle(toggled, beta_enabled):
|
@@ -7719,6 +8044,21 @@ with gr.Blocks(
|
|
7719 |
inputs=[text_to_music_toggle, beta_toggle],
|
7720 |
outputs=[text_to_music_prompt]
|
7721 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7722 |
model_dropdown = gr.Dropdown(
|
7723 |
choices=[model['name'] for model in AVAILABLE_MODELS],
|
7724 |
value=DEFAULT_MODEL_NAME,
|
@@ -8271,7 +8611,7 @@ with gr.Blocks(
|
|
8271 |
show_progress="hidden",
|
8272 |
).then(
|
8273 |
generation_code,
|
8274 |
-
inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, video_to_video_toggle, video_to_video_prompt, video_input, text_to_music_toggle, text_to_music_prompt],
|
8275 |
outputs=[code_output, history, sandbox, history_output]
|
8276 |
).then(
|
8277 |
end_generation_ui,
|
@@ -8368,6 +8708,10 @@ with gr.Blocks(
|
|
8368 |
upd_current_model = gr.skip()
|
8369 |
upd_t2m_toggle = gr.skip()
|
8370 |
upd_t2m_prompt = gr.skip()
|
|
|
|
|
|
|
|
|
8371 |
|
8372 |
# Split by comma to separate main prompt and directives
|
8373 |
segments = [seg.strip() for seg in (text or "").split(",") if seg.strip()]
|
@@ -8447,6 +8791,20 @@ with gr.Blocks(
|
|
8447 |
if p:
|
8448 |
upd_t2m_prompt = gr.update(value=p)
|
8449 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8450 |
# URL (website redesign)
|
8451 |
url = _extract_url(seg)
|
8452 |
if url:
|
@@ -8520,6 +8878,10 @@ with gr.Blocks(
|
|
8520 |
upd_current_model,
|
8521 |
upd_t2m_toggle,
|
8522 |
upd_t2m_prompt,
|
|
|
|
|
|
|
|
|
8523 |
)
|
8524 |
|
8525 |
# Wire chat submit -> apply settings -> run generation
|
@@ -8550,6 +8912,10 @@ with gr.Blocks(
|
|
8550 |
current_model,
|
8551 |
text_to_music_toggle,
|
8552 |
text_to_music_prompt,
|
|
|
|
|
|
|
|
|
8553 |
],
|
8554 |
queue=False,
|
8555 |
).then(
|
@@ -8559,7 +8925,7 @@ with gr.Blocks(
|
|
8559 |
show_progress="hidden",
|
8560 |
).then(
|
8561 |
generation_code,
|
8562 |
-
inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, video_to_video_toggle, video_to_video_prompt, video_input, text_to_music_toggle, text_to_music_prompt],
|
8563 |
outputs=[code_output, history, sandbox, history_output]
|
8564 |
).then(
|
8565 |
end_generation_ui,
|
@@ -8591,7 +8957,7 @@ with gr.Blocks(
|
|
8591 |
)
|
8592 |
|
8593 |
# Toggle between classic controls and beta chat UI
|
8594 |
-
def toggle_beta(checked: bool, t2i: bool, i2i: bool, i2v: bool, t2v: bool, v2v: bool, t2m: bool):
|
8595 |
# Prompts only visible in classic mode and when their toggles are on
|
8596 |
t2i_vis = (not checked) and bool(t2i)
|
8597 |
i2i_vis = (not checked) and bool(i2i)
|
@@ -8599,6 +8965,7 @@ with gr.Blocks(
|
|
8599 |
t2v_vis = (not checked) and bool(t2v)
|
8600 |
v2v_vis = (not checked) and bool(v2v)
|
8601 |
t2m_vis = (not checked) and bool(t2m)
|
|
|
8602 |
|
8603 |
return (
|
8604 |
# Chat UI group
|
@@ -8627,6 +8994,10 @@ with gr.Blocks(
|
|
8627 |
gr.update(visible=v2v_vis), # video_input
|
8628 |
gr.update(visible=not checked), # text_to_music_toggle
|
8629 |
gr.update(visible=t2m_vis), # text_to_music_prompt
|
|
|
|
|
|
|
|
|
8630 |
gr.update(visible=not checked), # model_dropdown
|
8631 |
gr.update(visible=not checked), # quick_start_md
|
8632 |
gr.update(visible=not checked), # quick_examples_col
|
@@ -8634,7 +9005,7 @@ with gr.Blocks(
|
|
8634 |
|
8635 |
beta_toggle.change(
|
8636 |
toggle_beta,
|
8637 |
-
inputs=[beta_toggle, image_generation_toggle, image_to_image_toggle, image_to_video_toggle, text_to_video_toggle, video_to_video_toggle, text_to_music_toggle],
|
8638 |
outputs=[
|
8639 |
sidebar_chatbot,
|
8640 |
sidebar_msg,
|
@@ -8660,6 +9031,10 @@ with gr.Blocks(
|
|
8660 |
video_input,
|
8661 |
text_to_music_toggle,
|
8662 |
text_to_music_prompt,
|
|
|
|
|
|
|
|
|
8663 |
model_dropdown,
|
8664 |
quick_start_md,
|
8665 |
quick_examples_col,
|
|
|
38 |
import asyncio
|
39 |
from datetime import datetime, timedelta
|
40 |
from typing import Optional
|
41 |
+
import dashscope
|
42 |
+
from dashscope.utils.oss_utils import check_and_upload_local
|
43 |
|
44 |
# Gradio supported languages for syntax highlighting
|
45 |
GRADIO_SUPPORTED_LANGUAGES = [
|
|
|
3681 |
except Exception as e:
|
3682 |
return f"Error generating music: {str(e)}"
|
3683 |
|
3684 |
+
class WanAnimateApp:
|
3685 |
+
"""Wan2.2-Animate integration for character animation and video replacement using DashScope API"""
|
3686 |
+
|
3687 |
+
def __init__(self):
|
3688 |
+
self.api_key = os.getenv("DASHSCOPE_API_KEY")
|
3689 |
+
if self.api_key:
|
3690 |
+
dashscope.api_key = self.api_key
|
3691 |
+
self.url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/image2video/video-synthesis/"
|
3692 |
+
self.get_url = "https://dashscope.aliyuncs.com/api/v1/tasks/"
|
3693 |
+
|
3694 |
+
def predict(self, ref_img, video, model_id, model):
|
3695 |
+
"""
|
3696 |
+
Generate animated video using Wan2.2-Animate
|
3697 |
+
|
3698 |
+
Args:
|
3699 |
+
ref_img: Reference image file path
|
3700 |
+
video: Template video file path
|
3701 |
+
model_id: Animation mode ("wan2.2-animate-move" or "wan2.2-animate-mix")
|
3702 |
+
model: Inference quality ("wan-pro" or "wan-std")
|
3703 |
+
|
3704 |
+
Returns:
|
3705 |
+
Tuple of (video_url, status_message)
|
3706 |
+
"""
|
3707 |
+
if not self.api_key:
|
3708 |
+
return None, "Error: DASHSCOPE_API_KEY environment variable is not set"
|
3709 |
+
|
3710 |
+
try:
|
3711 |
+
# Upload files to OSS if needed and get URLs
|
3712 |
+
_, image_url = check_and_upload_local(model_id, ref_img, self.api_key)
|
3713 |
+
_, video_url = check_and_upload_local(model_id, video, self.api_key)
|
3714 |
+
|
3715 |
+
# Prepare the request payload
|
3716 |
+
payload = {
|
3717 |
+
"model": model_id,
|
3718 |
+
"input": {
|
3719 |
+
"image_url": image_url,
|
3720 |
+
"video_url": video_url
|
3721 |
+
},
|
3722 |
+
"parameters": {
|
3723 |
+
"check_image": True,
|
3724 |
+
"mode": model,
|
3725 |
+
}
|
3726 |
+
}
|
3727 |
+
|
3728 |
+
# Set up headers
|
3729 |
+
headers = {
|
3730 |
+
"X-DashScope-Async": "enable",
|
3731 |
+
"X-DashScope-OssResourceResolve": "enable",
|
3732 |
+
"Authorization": f"Bearer {self.api_key}",
|
3733 |
+
"Content-Type": "application/json"
|
3734 |
+
}
|
3735 |
+
|
3736 |
+
# Make the initial API request
|
3737 |
+
response = requests.post(self.url, json=payload, headers=headers)
|
3738 |
+
|
3739 |
+
# Check if request was successful
|
3740 |
+
if response.status_code != 200:
|
3741 |
+
error_msg = f"Initial request failed with status code {response.status_code}: {response.text}"
|
3742 |
+
print(f"[WanAnimate] {error_msg}")
|
3743 |
+
return None, error_msg
|
3744 |
+
|
3745 |
+
# Get the task ID from response
|
3746 |
+
result = response.json()
|
3747 |
+
task_id = result.get("output", {}).get("task_id")
|
3748 |
+
if not task_id:
|
3749 |
+
error_msg = "Failed to get task ID from response"
|
3750 |
+
print(f"[WanAnimate] {error_msg}")
|
3751 |
+
return None, error_msg
|
3752 |
+
|
3753 |
+
# Poll for results
|
3754 |
+
get_url = f"{self.get_url}/{task_id}"
|
3755 |
+
headers = {
|
3756 |
+
"Authorization": f"Bearer {self.api_key}",
|
3757 |
+
"Content-Type": "application/json"
|
3758 |
+
}
|
3759 |
+
|
3760 |
+
max_attempts = 60 # 5 minutes max wait time
|
3761 |
+
attempt = 0
|
3762 |
+
|
3763 |
+
while attempt < max_attempts:
|
3764 |
+
response = requests.get(get_url, headers=headers)
|
3765 |
+
if response.status_code != 200:
|
3766 |
+
error_msg = f"Failed to get task status: {response.status_code}: {response.text}"
|
3767 |
+
print(f"[WanAnimate] {error_msg}")
|
3768 |
+
return None, error_msg
|
3769 |
+
|
3770 |
+
result = response.json()
|
3771 |
+
print(f"[WanAnimate] Task status check {attempt + 1}: {result}")
|
3772 |
+
task_status = result.get("output", {}).get("task_status")
|
3773 |
+
|
3774 |
+
if task_status == "SUCCEEDED":
|
3775 |
+
# Task completed successfully, return video URL
|
3776 |
+
video_url = result["output"]["results"]["video_url"]
|
3777 |
+
print(f"[WanAnimate] Animation completed successfully: {video_url}")
|
3778 |
+
return video_url, "SUCCEEDED"
|
3779 |
+
elif task_status == "FAILED":
|
3780 |
+
# Task failed, return error message
|
3781 |
+
error_msg = result.get("output", {}).get("message", "Unknown error")
|
3782 |
+
code_msg = result.get("output", {}).get("code", "Unknown code")
|
3783 |
+
full_error = f"Task failed: {error_msg} Code: {code_msg} TaskId: {task_id}"
|
3784 |
+
print(f"[WanAnimate] {full_error}")
|
3785 |
+
return None, full_error
|
3786 |
+
else:
|
3787 |
+
# Task is still running, wait and retry
|
3788 |
+
time.sleep(5) # Wait 5 seconds before polling again
|
3789 |
+
attempt += 1
|
3790 |
+
|
3791 |
+
# Timeout reached
|
3792 |
+
timeout_msg = f"Animation generation timed out after {max_attempts * 5} seconds. TaskId: {task_id}"
|
3793 |
+
print(f"[WanAnimate] {timeout_msg}")
|
3794 |
+
return None, timeout_msg
|
3795 |
+
|
3796 |
+
except Exception as e:
|
3797 |
+
error_msg = f"Exception during animation generation: {str(e)}"
|
3798 |
+
print(f"[WanAnimate] {error_msg}")
|
3799 |
+
return None, error_msg
|
3800 |
+
|
3801 |
+
def generate_animation_from_image_video(input_image_data, input_video_data, prompt: str, model_id: str = "wan2.2-animate-move", model: str = "wan-pro", session_id: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
|
3802 |
+
"""Generate animated video from reference image and template video using Wan2.2-Animate.
|
3803 |
+
|
3804 |
+
Returns an HTML <video> tag whose source points to a temporary file URL.
|
3805 |
+
"""
|
3806 |
+
try:
|
3807 |
+
print(f"[ImageVideo2Animation] Starting animation generation with model={model_id}, quality={model}")
|
3808 |
+
|
3809 |
+
if not os.getenv("DASHSCOPE_API_KEY"):
|
3810 |
+
print("[ImageVideo2Animation] Missing DASHSCOPE_API_KEY")
|
3811 |
+
return "Error: DASHSCOPE_API_KEY environment variable is not set. Please configure your DashScope API key."
|
3812 |
+
|
3813 |
+
# Normalize inputs to file paths
|
3814 |
+
def _save_to_temp_file(data, suffix):
|
3815 |
+
if isinstance(data, str) and os.path.exists(data):
|
3816 |
+
return data
|
3817 |
+
elif hasattr(data, 'name') and os.path.exists(data.name):
|
3818 |
+
return data.name
|
3819 |
+
else:
|
3820 |
+
# Save to temporary file
|
3821 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
3822 |
+
if hasattr(data, 'read'):
|
3823 |
+
temp_file.write(data.read())
|
3824 |
+
elif isinstance(data, (bytes, bytearray)):
|
3825 |
+
temp_file.write(data)
|
3826 |
+
elif isinstance(data, np.ndarray):
|
3827 |
+
# Handle numpy array (likely image data)
|
3828 |
+
if suffix.lower() in ['.jpg', '.jpeg', '.png']:
|
3829 |
+
# Convert numpy array to image
|
3830 |
+
from PIL import Image
|
3831 |
+
if data.dtype != np.uint8:
|
3832 |
+
data = (data * 255).astype(np.uint8)
|
3833 |
+
if len(data.shape) == 3 and data.shape[2] == 3:
|
3834 |
+
# RGB image
|
3835 |
+
img = Image.fromarray(data, 'RGB')
|
3836 |
+
elif len(data.shape) == 3 and data.shape[2] == 4:
|
3837 |
+
# RGBA image
|
3838 |
+
img = Image.fromarray(data, 'RGBA')
|
3839 |
+
elif len(data.shape) == 2:
|
3840 |
+
# Grayscale image
|
3841 |
+
img = Image.fromarray(data, 'L')
|
3842 |
+
else:
|
3843 |
+
raise ValueError(f"Unsupported numpy array shape for image: {data.shape}")
|
3844 |
+
img.save(temp_file.name, format='JPEG' if suffix.lower() in ['.jpg', '.jpeg'] else 'PNG')
|
3845 |
+
else:
|
3846 |
+
raise ValueError(f"Cannot save numpy array as {suffix} format")
|
3847 |
+
else:
|
3848 |
+
raise ValueError(f"Unsupported data type: {type(data)}")
|
3849 |
+
temp_file.close()
|
3850 |
+
return temp_file.name
|
3851 |
+
|
3852 |
+
ref_img_path = _save_to_temp_file(input_image_data, '.jpg')
|
3853 |
+
video_path = _save_to_temp_file(input_video_data, '.mp4')
|
3854 |
+
|
3855 |
+
print(f"[ImageVideo2Animation] Input files prepared: image={ref_img_path}, video={video_path}")
|
3856 |
+
|
3857 |
+
# Initialize WanAnimateApp and generate animation
|
3858 |
+
wan_app = WanAnimateApp()
|
3859 |
+
video_url, status = wan_app.predict(ref_img_path, video_path, model_id, model)
|
3860 |
+
|
3861 |
+
if video_url and status == "SUCCEEDED":
|
3862 |
+
print(f"[ImageVideo2Animation] Animation generated successfully: {video_url}")
|
3863 |
+
|
3864 |
+
# Download the video and create temporary URL
|
3865 |
+
try:
|
3866 |
+
response = requests.get(video_url, timeout=60)
|
3867 |
+
response.raise_for_status()
|
3868 |
+
video_bytes = response.content
|
3869 |
+
|
3870 |
+
filename = "wan_animate_result.mp4"
|
3871 |
+
temp_url = upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
|
3872 |
+
|
3873 |
+
if temp_url.startswith("Error"):
|
3874 |
+
print(f"[ImageVideo2Animation] Failed to upload video: {temp_url}")
|
3875 |
+
return temp_url
|
3876 |
+
|
3877 |
+
# Create video HTML tag
|
3878 |
+
video_html = (
|
3879 |
+
f'<video controls autoplay muted loop playsinline '
|
3880 |
+
f'style="max-width:100%; height:auto; border-radius:8px; box-shadow:0 4px 8px rgba(0,0,0,0.1)" '
|
3881 |
+
f'onerror="this.style.display=\'none\'; console.error(\'Animation video failed to load\')">'
|
3882 |
+
f'<source src="{temp_url}" type="video/mp4" />'
|
3883 |
+
f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
|
3884 |
+
f'</video>'
|
3885 |
+
)
|
3886 |
+
|
3887 |
+
print(f"[ImageVideo2Animation] Successfully created animation HTML with temporary URL: {temp_url}")
|
3888 |
+
return video_html
|
3889 |
+
|
3890 |
+
except Exception as e:
|
3891 |
+
error_msg = f"Failed to download generated animation: {str(e)}"
|
3892 |
+
print(f"[ImageVideo2Animation] {error_msg}")
|
3893 |
+
return f"Error: {error_msg}"
|
3894 |
+
else:
|
3895 |
+
error_msg = f"Animation generation failed: {status}"
|
3896 |
+
print(f"[ImageVideo2Animation] {error_msg}")
|
3897 |
+
return f"Error: {error_msg}"
|
3898 |
+
|
3899 |
+
except Exception as e:
|
3900 |
+
print(f"[ImageVideo2Animation] Exception during generation:")
|
3901 |
+
print(f"Animation generation error: {str(e)}")
|
3902 |
+
return f"Error generating animation: {str(e)}"
|
3903 |
+
|
3904 |
def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
|
3905 |
"""Extract image generation prompts from the full text based on number of images needed"""
|
3906 |
# Use the entire text as the base prompt for image generation
|
|
|
4553 |
print("[Video2Video] No <body> tag; appending video via replacement block")
|
4554 |
return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
|
4555 |
|
4556 |
+
def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_video_to_video: bool = False, video_to_video_prompt: Optional[str] = None, input_video_data = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None, enable_image_video_to_animation: bool = False, animation_mode: str = "wan2.2-animate-move", animation_quality: str = "wan-pro", animation_video_data = None, token: gr.OAuthToken | None = None) -> str:
|
4557 |
"""Apply text/image/video/music replacements to HTML content.
|
4558 |
|
4559 |
- Works with single-document HTML strings
|
|
|
4583 |
try:
|
4584 |
print(
|
4585 |
f"[MediaApply] enable_i2v={enable_image_to_video}, enable_i2i={enable_image_to_image}, "
|
4586 |
+
f"enable_t2i={enable_text_to_image}, enable_t2v={enable_text_to_video}, enable_v2v={enable_video_to_video}, enable_t2m={enable_text_to_music}, enable_iv2a={enable_image_video_to_animation}, has_image={input_image_data is not None}, has_video={input_video_data is not None}, has_anim_video={animation_video_data is not None}"
|
4587 |
)
|
4588 |
+
|
4589 |
+
# If image+video-to-animation is enabled, generate animated video and return.
|
4590 |
+
if enable_image_video_to_animation and input_image_data is not None and animation_video_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
|
4591 |
+
print(f"[MediaApply] Running image+video-to-animation with mode={animation_mode}, quality={animation_quality}")
|
4592 |
+
try:
|
4593 |
+
animation_html_tag = generate_animation_from_image_video(
|
4594 |
+
input_image_data,
|
4595 |
+
animation_video_data,
|
4596 |
+
user_prompt or "",
|
4597 |
+
model_id=animation_mode,
|
4598 |
+
model=animation_quality,
|
4599 |
+
session_id=session_id,
|
4600 |
+
token=token
|
4601 |
+
)
|
4602 |
+
if not (animation_html_tag or "").startswith("Error"):
|
4603 |
+
# Validate animation video HTML before attempting placement
|
4604 |
+
if validate_video_html(animation_html_tag):
|
4605 |
+
blocks_anim = llm_place_media(result, animation_html_tag, media_kind="video")
|
4606 |
+
else:
|
4607 |
+
print("[MediaApply] Generated animation HTML failed validation, skipping LLM placement")
|
4608 |
+
blocks_anim = ""
|
4609 |
+
else:
|
4610 |
+
print(f"[MediaApply] Animation generation failed: {animation_html_tag}")
|
4611 |
+
blocks_anim = ""
|
4612 |
+
except Exception as e:
|
4613 |
+
print(f"[MediaApply] Exception during animation generation: {str(e)}")
|
4614 |
+
blocks_anim = ""
|
4615 |
+
|
4616 |
+
# If LLM placement failed, use fallback placement
|
4617 |
+
if not blocks_anim:
|
4618 |
+
# Create simple replacement block for animation video
|
4619 |
+
blocks_anim = f"""{SEARCH_START}
|
4620 |
+
</head>
|
4621 |
+
|
4622 |
+
{DIVIDER}
|
4623 |
+
</head>
|
4624 |
+
<div class="animation-container" style="margin: 20px 0; text-align: center;">
|
4625 |
+
{animation_html_tag}
|
4626 |
+
</div>
|
4627 |
+
{REPLACE_END}"""
|
4628 |
+
|
4629 |
+
if blocks_anim:
|
4630 |
+
print("[MediaApply] Applying animation replacement blocks")
|
4631 |
+
result = apply_search_replace_changes(result, blocks_anim)
|
4632 |
+
if is_multipage and entry_html_path:
|
4633 |
+
multipage_files[entry_html_path] = result
|
4634 |
+
return format_multipage_output(multipage_files)
|
4635 |
+
return result
|
4636 |
+
|
4637 |
# If image-to-video is enabled, replace the first image with a generated video and return.
|
4638 |
if enable_image_to_video and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
|
4639 |
i2v_prompt = (image_to_video_prompt or user_prompt or "").strip()
|
|
|
5787 |
stop_generation = False
|
5788 |
|
5789 |
|
5790 |
+
def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto", enable_image_generation: bool = False, enable_image_to_image: bool = False, image_to_image_prompt: Optional[str] = None, text_to_image_prompt: Optional[str] = None, enable_image_to_video: bool = False, image_to_video_prompt: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_video_to_video: bool = False, video_to_video_prompt: Optional[str] = None, input_video_data = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None, enable_image_video_to_animation: bool = False, animation_mode: str = "wan2.2-animate-move", animation_quality: str = "wan-pro", animation_video_data = None):
|
5791 |
if query is None:
|
5792 |
query = ''
|
5793 |
if _history is None:
|
|
|
6064 |
input_video_data=input_video_data,
|
6065 |
enable_text_to_music=enable_text_to_music,
|
6066 |
text_to_music_prompt=text_to_music_prompt,
|
6067 |
+
enable_image_video_to_animation=enable_image_video_to_animation,
|
6068 |
+
animation_mode=animation_mode,
|
6069 |
+
animation_quality=animation_quality,
|
6070 |
+
animation_video_data=animation_video_data,
|
6071 |
+
token=None,
|
6072 |
)
|
6073 |
|
6074 |
_history.append([query, final_content])
|
|
|
6143 |
input_video_data=input_video_data,
|
6144 |
enable_text_to_music=enable_text_to_music,
|
6145 |
text_to_music_prompt=text_to_music_prompt,
|
6146 |
+
enable_image_video_to_animation=enable_image_video_to_animation,
|
6147 |
+
animation_mode=animation_mode,
|
6148 |
+
animation_quality=animation_quality,
|
6149 |
+
animation_video_data=animation_video_data,
|
6150 |
token=None,
|
6151 |
)
|
6152 |
|
|
|
6179 |
input_video_data=input_video_data,
|
6180 |
enable_text_to_music=enable_text_to_music,
|
6181 |
text_to_music_prompt=text_to_music_prompt,
|
6182 |
+
enable_image_video_to_animation=enable_image_video_to_animation,
|
6183 |
+
animation_mode=animation_mode,
|
6184 |
+
animation_quality=animation_quality,
|
6185 |
+
animation_video_data=animation_video_data,
|
6186 |
token=None,
|
6187 |
)
|
6188 |
else:
|
|
|
6624 |
input_video_data=input_video_data,
|
6625 |
enable_text_to_music=enable_text_to_music,
|
6626 |
text_to_music_prompt=text_to_music_prompt,
|
6627 |
+
enable_image_video_to_animation=enable_image_video_to_animation,
|
6628 |
+
animation_mode=animation_mode,
|
6629 |
+
animation_quality=animation_quality,
|
6630 |
+
animation_video_data=animation_video_data,
|
6631 |
token=None,
|
6632 |
)
|
6633 |
|
|
|
6663 |
input_video_data=input_video_data,
|
6664 |
enable_text_to_music=enable_text_to_music,
|
6665 |
text_to_music_prompt=text_to_music_prompt,
|
6666 |
+
enable_image_video_to_animation=enable_image_video_to_animation,
|
6667 |
+
animation_mode=animation_mode,
|
6668 |
+
animation_quality=animation_quality,
|
6669 |
+
animation_video_data=animation_video_data,
|
6670 |
+
token=None,
|
6671 |
)
|
6672 |
|
6673 |
_history.append([query, final_content])
|
|
|
7967 |
visible=False
|
7968 |
)
|
7969 |
|
7970 |
+
# Image+Video to Animation
|
7971 |
+
image_video_to_animation_toggle = gr.Checkbox(
|
7972 |
+
label="🎭 Character Animation (uses input image + video)",
|
7973 |
+
value=False,
|
7974 |
+
visible=True,
|
7975 |
+
info="Animate characters using Wan2.2-Animate with reference image and template video"
|
7976 |
+
)
|
7977 |
+
animation_mode_dropdown = gr.Dropdown(
|
7978 |
+
label="Animation Mode",
|
7979 |
+
choices=[
|
7980 |
+
("Move Mode (animate character with video motion)", "wan2.2-animate-move"),
|
7981 |
+
("Mix Mode (replace character in video)", "wan2.2-animate-mix")
|
7982 |
+
],
|
7983 |
+
value="wan2.2-animate-move",
|
7984 |
+
visible=False,
|
7985 |
+
info="Move: animate image character with video motion. Mix: replace video character with image character"
|
7986 |
+
)
|
7987 |
+
animation_quality_dropdown = gr.Dropdown(
|
7988 |
+
label="Animation Quality",
|
7989 |
+
choices=[
|
7990 |
+
("Professional (25fps, 720p)", "wan-pro"),
|
7991 |
+
("Standard (15fps, 720p)", "wan-std")
|
7992 |
+
],
|
7993 |
+
value="wan-pro",
|
7994 |
+
visible=False,
|
7995 |
+
info="Higher quality takes more time to generate"
|
7996 |
+
)
|
7997 |
+
animation_video_input = gr.Video(
|
7998 |
+
label="Template video for animation (upload a video to use as motion template or character replacement source)",
|
7999 |
+
visible=False
|
8000 |
+
)
|
8001 |
+
|
8002 |
# LLM-guided media placement is now always on (no toggle in UI)
|
8003 |
|
8004 |
def on_image_to_image_toggle(toggled, beta_enabled):
|
|
|
8044 |
inputs=[text_to_music_toggle, beta_toggle],
|
8045 |
outputs=[text_to_music_prompt]
|
8046 |
)
|
8047 |
+
|
8048 |
+
def on_image_video_to_animation_toggle(toggled, beta_enabled):
|
8049 |
+
vis = bool(toggled) and not bool(beta_enabled)
|
8050 |
+
return (
|
8051 |
+
gr.update(visible=vis), # generation_image_input
|
8052 |
+
gr.update(visible=vis), # animation_mode_dropdown
|
8053 |
+
gr.update(visible=vis), # animation_quality_dropdown
|
8054 |
+
gr.update(visible=vis), # animation_video_input
|
8055 |
+
)
|
8056 |
+
|
8057 |
+
image_video_to_animation_toggle.change(
|
8058 |
+
on_image_video_to_animation_toggle,
|
8059 |
+
inputs=[image_video_to_animation_toggle, beta_toggle],
|
8060 |
+
outputs=[generation_image_input, animation_mode_dropdown, animation_quality_dropdown, animation_video_input]
|
8061 |
+
)
|
8062 |
model_dropdown = gr.Dropdown(
|
8063 |
choices=[model['name'] for model in AVAILABLE_MODELS],
|
8064 |
value=DEFAULT_MODEL_NAME,
|
|
|
8611 |
show_progress="hidden",
|
8612 |
).then(
|
8613 |
generation_code,
|
8614 |
+
inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, video_to_video_toggle, video_to_video_prompt, video_input, text_to_music_toggle, text_to_music_prompt, image_video_to_animation_toggle, animation_mode_dropdown, animation_quality_dropdown, animation_video_input],
|
8615 |
outputs=[code_output, history, sandbox, history_output]
|
8616 |
).then(
|
8617 |
end_generation_ui,
|
|
|
8708 |
upd_current_model = gr.skip()
|
8709 |
upd_t2m_toggle = gr.skip()
|
8710 |
upd_t2m_prompt = gr.skip()
|
8711 |
+
upd_iv2a_toggle = gr.skip()
|
8712 |
+
upd_anim_mode = gr.skip()
|
8713 |
+
upd_anim_quality = gr.skip()
|
8714 |
+
upd_anim_video = gr.skip()
|
8715 |
|
8716 |
# Split by comma to separate main prompt and directives
|
8717 |
segments = [seg.strip() for seg in (text or "").split(",") if seg.strip()]
|
|
|
8791 |
if p:
|
8792 |
upd_t2m_prompt = gr.update(value=p)
|
8793 |
|
8794 |
+
# Image+Video-to-Animation
|
8795 |
+
if ("animate" in seg_norm) or ("character animation" in seg_norm) or ("wan animate" in seg_norm):
|
8796 |
+
upd_iv2a_toggle = gr.update(value=True)
|
8797 |
+
# Check for mode specification
|
8798 |
+
if "move mode" in seg_norm:
|
8799 |
+
upd_anim_mode = gr.update(value="wan2.2-animate-move")
|
8800 |
+
elif "mix mode" in seg_norm:
|
8801 |
+
upd_anim_mode = gr.update(value="wan2.2-animate-mix")
|
8802 |
+
# Check for quality specification
|
8803 |
+
if "standard quality" in seg_norm or "std quality" in seg_norm:
|
8804 |
+
upd_anim_quality = gr.update(value="wan-std")
|
8805 |
+
elif "professional quality" in seg_norm or "pro quality" in seg_norm:
|
8806 |
+
upd_anim_quality = gr.update(value="wan-pro")
|
8807 |
+
|
8808 |
# URL (website redesign)
|
8809 |
url = _extract_url(seg)
|
8810 |
if url:
|
|
|
8878 |
upd_current_model,
|
8879 |
upd_t2m_toggle,
|
8880 |
upd_t2m_prompt,
|
8881 |
+
upd_iv2a_toggle,
|
8882 |
+
upd_anim_mode,
|
8883 |
+
upd_anim_quality,
|
8884 |
+
upd_anim_video,
|
8885 |
)
|
8886 |
|
8887 |
# Wire chat submit -> apply settings -> run generation
|
|
|
8912 |
current_model,
|
8913 |
text_to_music_toggle,
|
8914 |
text_to_music_prompt,
|
8915 |
+
image_video_to_animation_toggle,
|
8916 |
+
animation_mode_dropdown,
|
8917 |
+
animation_quality_dropdown,
|
8918 |
+
animation_video_input,
|
8919 |
],
|
8920 |
queue=False,
|
8921 |
).then(
|
|
|
8925 |
show_progress="hidden",
|
8926 |
).then(
|
8927 |
generation_code,
|
8928 |
+
inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, video_to_video_toggle, video_to_video_prompt, video_input, text_to_music_toggle, text_to_music_prompt, image_video_to_animation_toggle, animation_mode_dropdown, animation_quality_dropdown, animation_video_input],
|
8929 |
outputs=[code_output, history, sandbox, history_output]
|
8930 |
).then(
|
8931 |
end_generation_ui,
|
|
|
8957 |
)
|
8958 |
|
8959 |
# Toggle between classic controls and beta chat UI
|
8960 |
+
def toggle_beta(checked: bool, t2i: bool, i2i: bool, i2v: bool, t2v: bool, v2v: bool, t2m: bool, iv2a: bool):
|
8961 |
# Prompts only visible in classic mode and when their toggles are on
|
8962 |
t2i_vis = (not checked) and bool(t2i)
|
8963 |
i2i_vis = (not checked) and bool(i2i)
|
|
|
8965 |
t2v_vis = (not checked) and bool(t2v)
|
8966 |
v2v_vis = (not checked) and bool(v2v)
|
8967 |
t2m_vis = (not checked) and bool(t2m)
|
8968 |
+
iv2a_vis = (not checked) and bool(iv2a)
|
8969 |
|
8970 |
return (
|
8971 |
# Chat UI group
|
|
|
8994 |
gr.update(visible=v2v_vis), # video_input
|
8995 |
gr.update(visible=not checked), # text_to_music_toggle
|
8996 |
gr.update(visible=t2m_vis), # text_to_music_prompt
|
8997 |
+
gr.update(visible=not checked), # image_video_to_animation_toggle
|
8998 |
+
gr.update(visible=iv2a_vis), # animation_mode_dropdown
|
8999 |
+
gr.update(visible=iv2a_vis), # animation_quality_dropdown
|
9000 |
+
gr.update(visible=iv2a_vis), # animation_video_input
|
9001 |
gr.update(visible=not checked), # model_dropdown
|
9002 |
gr.update(visible=not checked), # quick_start_md
|
9003 |
gr.update(visible=not checked), # quick_examples_col
|
|
|
9005 |
|
9006 |
beta_toggle.change(
|
9007 |
toggle_beta,
|
9008 |
+
inputs=[beta_toggle, image_generation_toggle, image_to_image_toggle, image_to_video_toggle, text_to_video_toggle, video_to_video_toggle, text_to_music_toggle, image_video_to_animation_toggle],
|
9009 |
outputs=[
|
9010 |
sidebar_chatbot,
|
9011 |
sidebar_msg,
|
|
|
9031 |
video_input,
|
9032 |
text_to_music_toggle,
|
9033 |
text_to_music_prompt,
|
9034 |
+
image_video_to_animation_toggle,
|
9035 |
+
animation_mode_dropdown,
|
9036 |
+
animation_quality_dropdown,
|
9037 |
+
animation_video_input,
|
9038 |
model_dropdown,
|
9039 |
quick_start_md,
|
9040 |
quick_examples_col,
|
requirements.txt
CHANGED
@@ -10,4 +10,5 @@ requests
|
|
10 |
beautifulsoup4
|
11 |
html2text
|
12 |
openai
|
13 |
-
mistralai
|
|
|
|
10 |
beautifulsoup4
|
11 |
html2text
|
12 |
openai
|
13 |
+
mistralai
|
14 |
+
dashscope
|