1010import os
1111from loguru import logger
1212import shutil
13-
13+ import struct
1414
1515#They are used in the Audio class
1616RESULT = None
@@ -155,6 +155,88 @@ def customize_image(filePath,savePath,new_width=None,new_height=None):
155155 img .save (savePath )
156156
157157
158+ def get_image_hash (image_path ):
159+
160+ """
161+ Gets the hash value of the image
162+ Args:
163+ image_path: Image path
164+
165+ Returns:
166+ The hash of the image (consisting of 0 and 1)
167+ """
168+
169+ img = PILImage .open (image_path )
170+ w = img .size [0 ]
171+ h = img .size [1 ]
172+ img .close () #关闭img,释放资源
173+
174+ # 打开图像文件
175+ with open (image_path , "rb" ) as f :
176+ data = f .read ()
177+ # 解析图像头信息
178+ width , height = struct .unpack ('>II' , data [16 :24 ])
179+ pixel_data = data [24 :]
180+
181+ # 调整图像大小
182+ if w > width or h > height :
183+ raise ValueError ("Invalid size" )
184+ xstep = width // w
185+ ystep = height // h
186+
187+ # 计算像素均值并生成哈希值
188+ pixels = []
189+ for y in range (h ):
190+ for x in range (w ):
191+ pixel = pixel_data [((y * ystep ) * width + (x * xstep )) * 3 :((y * ystep ) * width + (x * xstep )) * 3 + 3 ]
192+ pixels .append (sum (pixel ) / 3 )
193+
194+ # 计算平均像素值
195+ avg_pixel = sum (pixels ) / len (pixels )
196+
197+ # 生成哈希值
198+ hash_value = ""
199+ for pixel in pixels :
200+ if pixel > avg_pixel :
201+ hash_value += "1"
202+ else :
203+ hash_value += "0"
204+ return hash_value
205+
206+ def remove_same_images (directoryPath ):
207+
208+ """
209+ Delete the same images in the directory, and calculate the hash value of the images to identify whether the pictures are the same.
210+ Supports jpg, png, bmp, webp, jpeg, gif, svg, tif, tiff.
211+ Args:
212+ directoryPath: The file directory path
213+
214+ Returns:
215+ None
216+ """
217+
218+ # 保存图像哈希值和路径
219+ hashes = {}
220+
221+ # 遍历目录中的所有图像
222+ for filename in os .listdir (directoryPath ):
223+ if not filename .endswith ((".jpg" , ".png" , ".bmp" ,".webp" ,".jpeg" ,".gif" ,"svg" ,"tif" ,"tiff" )):
224+ continue
225+ filepath = os .path .join (directoryPath , filename )
226+
227+ # 计算图像哈希值
228+ hash_value = get_image_hash (filepath )
229+
230+ # 如果哈希值已经存在,则删除图像
231+ if hash_value in hashes :
232+ logger .info (f"Removing duplicate image: { filepath } " )
233+ os .remove (filepath )
234+ else :
235+ hashes [hash_value ] = filepath
236+
237+ logger .info ("Done removing duplicate images." )
238+
239+
158240def categorize_image (filePath ):
159241
160242 """
@@ -172,55 +254,55 @@ def categorize_image(filePath):
172254 if ext == ".png" :
173255 try :
174256 os .makedirs (f'{ filePath } /png' )
175- except FileExistsError as error :
257+ except FileExistsError :
176258 pass
177259 shutil .copy (os .path .join (filePath , filename ), f'{ filePath } /png' )
178260 elif ext == ".jpg" or ext == 'jpeg' :
179261 try :
180262 os .makedirs (f'{ filePath } /jpg' )
181- except FileExistsError as error :
263+ except FileExistsError :
182264 pass
183265 shutil .copy (os .path .join (filePath , filename ), f'{ filePath } /jpg' )
184266 elif ext == '.webp' :
185267 try :
186268 os .makedirs (f'{ filePath } /webp' )
187- except FileExistsError as error :
269+ except FileExistsError :
188270 pass
189271 shutil .copy (os .path .join (filePath , filename ), f'{ filePath } /webp' )
190272 elif ext == 'bmp' :
191273 try :
192274 os .makedirs (f'{ filePath } /bmp' )
193- except FileExistsError as error :
275+ except FileExistsError :
194276 pass
195277 shutil .copy (os .path .join (filePath ,filename ),f'{ filePath } /bmp' )
196278 elif ext == 'tif' or ext == 'tiff' :
197279 try :
198280 os .makedirs (f'{ filePath } /tif' )
199- except FileExistsError as error :
281+ except FileExistsError :
200282 pass
201283 shutil .copy (os .path .join (filePath ,filename ),f'{ filePath } /tif' )
202284 elif ext == 'gif' :
203285 try :
204286 os .makedirs (f'{ filePath } /gif' )
205- except FileExistsError as error :
287+ except FileExistsError :
206288 pass
207289 shutil .copy (os .path .join (filePath ,filename ),f'{ filePath } /gif' )
208290 elif ext == 'svg' :
209291 try :
210292 os .makedirs (f'{ filePath } /svg' )
211- except FileExistsError as error :
293+ except FileExistsError :
212294 pass
213295 shutil .copy (os .path .join (filePath ,filename ),f'{ filePath } /svg' )
214296 elif ext == 'wmf' :
215297 try :
216298 os .makedirs (f'{ filePath } /wmf' )
217- except FileExistsError as error :
299+ except FileExistsError :
218300 pass
219301 shutil .copy (os .path .join (filePath ,filename ),f'{ filePath } /wmf' )
220302 else :
221303 try :
222304 os .makedirs (f'{ filePath } /another' )
223- except FileExistsError as error :
305+ except FileExistsError :
224306 pass
225307 shutil .copy (os .path .join (filePath ,filename ),f'{ filePath } /another' )
226308
0 commit comments