44from typing import Any , Dict , List , Union
55from urllib .parse import urlparse
66import re
7+ import mimetypes
78
89import pydantic
910import requests
@@ -79,23 +80,23 @@ def is_url(string: str) -> bool:
7980
8081def encode_image (image : Union [str , bytes , "PILImage.Image" , dict ], download_images : bool = False ) -> str :
8182 """
82- Encode an image to a base64 data URI.
83+ Encode an image or file to a base64 data URI.
8384
8485 Args:
85- image: The image to encode. Can be a PIL Image, file path, URL, or data URI.
86+ image: The image or file to encode. Can be a PIL Image, file path, URL, or data URI.
8687 download_images: Whether to download images from URLs.
8788
8889 Returns:
89- str: The data URI of the image or the URL if download_images is False.
90+ str: The data URI of the file or the URL if download_images is False.
9091
9192 Raises:
92- ValueError: If the image type is not supported.
93+ ValueError: If the file type is not supported.
9394 """
9495 if isinstance (image , dict ) and "url" in image :
9596 # NOTE: Not doing other validation for now
9697 return image ["url" ]
9798 elif isinstance (image , str ):
98- if image .startswith ("data:image/ " ):
99+ if image .startswith ("data:" ):
99100 # Already a data URI
100101 return image
101102 elif os .path .isfile (image ):
@@ -110,8 +111,8 @@ def encode_image(image: Union[str, bytes, "PILImage.Image", dict], download_imag
110111 return image
111112 else :
112113 # Unsupported string format
113- print (f"Unsupported image string: { image } " )
114- raise ValueError (f"Unsupported image string: { image } " )
114+ print (f"Unsupported file string: { image } " )
115+ raise ValueError (f"Unsupported file string: { image } " )
115116 elif PIL_AVAILABLE and isinstance (image , PILImage .Image ):
116117 # PIL Image
117118 return _encode_pil_image (image )
@@ -129,34 +130,52 @@ def encode_image(image: Union[str, bytes, "PILImage.Image", dict], download_imag
129130
130131
131132def _encode_image_from_file (file_path : str ) -> str :
132- """Encode an image from a file path to a base64 data URI."""
133- with open (file_path , "rb" ) as image_file :
134- image_data = image_file .read ()
135- file_extension = _get_file_extension (file_path )
136- encoded_image = base64 .b64encode (image_data ).decode ("utf-8" )
137- return f"data:image/{ file_extension } ;base64,{ encoded_image } "
133+ """Encode a file from a file path to a base64 data URI."""
134+ with open (file_path , "rb" ) as file :
135+ file_data = file .read ()
136+
137+ # Use mimetypes to guess directly from the file path
138+ mime_type , _ = mimetypes .guess_type (file_path )
139+ if mime_type is None :
140+ raise ValueError (f"Could not determine MIME type for file: { file_path } " )
141+
142+ encoded_data = base64 .b64encode (file_data ).decode ("utf-8" )
143+ return f"data:{ mime_type } ;base64,{ encoded_data } "
138144
139145
140146def _encode_image_from_url (image_url : str ) -> str :
141- """Encode an image from a URL to a base64 data URI."""
147+ """Encode a file from a URL to a base64 data URI."""
142148 response = requests .get (image_url )
143149 response .raise_for_status ()
144150 content_type = response .headers .get ("Content-Type" , "" )
145- if content_type .startswith ("image/" ):
146- file_extension = content_type .split ("/" )[- 1 ]
151+
152+ # Use the content type from the response headers if available
153+ if content_type :
154+ mime_type = content_type
147155 else :
148- # Fallback to file extension from URL or default to 'png'
149- file_extension = _get_file_extension (image_url ) or "png"
150- encoded_image = base64 .b64encode (response .content ).decode ("utf-8" )
151- return f"data:image/{ file_extension } ;base64,{ encoded_image } "
156+ # Try to guess MIME type from URL
157+ mime_type , _ = mimetypes .guess_type (image_url )
158+ if mime_type is None :
159+ raise ValueError (f"Could not determine MIME type for URL: { image_url } " )
160+
161+ encoded_data = base64 .b64encode (response .content ).decode ("utf-8" )
162+ return f"data:{ mime_type } ;base64,{ encoded_data } "
163+
152164
153165def _encode_pil_image (image : 'PILImage' ) -> str :
154166 """Encode a PIL Image object to a base64 data URI."""
155167 buffered = io .BytesIO ()
156- file_extension = (image .format or "PNG" ).lower ()
157- image .save (buffered , format = file_extension )
158- encoded_image = base64 .b64encode (buffered .getvalue ()).decode ("utf-8" )
159- return f"data:image/{ file_extension } ;base64,{ encoded_image } "
168+ file_format = image .format or "PNG"
169+ image .save (buffered , format = file_format )
170+
171+ # Get the correct MIME type using the image format
172+ file_extension = file_format .lower ()
173+ mime_type , _ = mimetypes .guess_type (f"file.{ file_extension } " )
174+ if mime_type is None :
175+ raise ValueError (f"Could not determine MIME type for image format: { file_format } " )
176+
177+ encoded_data = base64 .b64encode (buffered .getvalue ()).decode ("utf-8" )
178+ return f"data:{ mime_type } ;base64,{ encoded_data } "
160179
161180
162181def _get_file_extension (path_or_url : str ) -> str :
@@ -166,11 +185,11 @@ def _get_file_extension(path_or_url: str) -> str:
166185
167186
168187def is_image (obj ) -> bool :
169- """Check if the object is an image or a valid image reference."""
188+ """Check if the object is an image or a valid media file reference."""
170189 if PIL_AVAILABLE and isinstance (obj , PILImage .Image ):
171190 return True
172191 if isinstance (obj , str ):
173- if obj .startswith ("data:image/ " ):
192+ if obj .startswith ("data:" ):
174193 return True
175194 elif os .path .isfile (obj ):
176195 return True
0 commit comments