|
| 1 | +import os |
| 2 | +import sys |
| 3 | +import argparse |
| 4 | +import hashlib |
| 5 | +import time |
| 6 | +from datetime import datetime |
| 7 | + |
| 8 | +def log_message(log_file_path, message): |
| 9 | + timestamp = datetime.now().strftime('%Y_%m_%d---%H:%M') |
| 10 | + with open(log_file_path, 'a', encoding='utf-8') as log_file: |
| 11 | + log_file.write(f'{timestamp} - {message}\n') |
| 12 | + |
| 13 | +def process_file(file_path): |
| 14 | + complete_path = os.path.abspath(file_path) |
| 15 | + file_size = os.path.getsize(complete_path) |
| 16 | + modified_date = int(os.path.getmtime(complete_path)) |
| 17 | + creation_date = int(os.path.getctime(complete_path)) |
| 18 | + file_name_ext = os.path.basename(complete_path) |
| 19 | + |
| 20 | + hash_string = f"{complete_path}\t{file_size}\t{modified_date}" |
| 21 | + file_hash = hashlib.sha256(hash_string.encode()).hexdigest() |
| 22 | + |
| 23 | + return (complete_path, file_size, modified_date, file_hash, |
| 24 | + file_name_ext, creation_date) |
| 25 | + |
| 26 | +def scan_folders(input_file_path, output_file_path, sleep_value=None, |
| 27 | + files_sleep=None): |
| 28 | + script_dir = os.path.dirname(os.path.abspath(sys.argv[0])) |
| 29 | + log_file_path = os.path.join(script_dir, 'file_scanner.log') |
| 30 | + |
| 31 | + log_message(log_file_path, 'start') |
| 32 | + log_message(log_file_path, 'received command line') |
| 33 | + |
| 34 | + try: |
| 35 | + with open(input_file_path, 'r', encoding='utf-8') as input_file: |
| 36 | + folders = [line.strip() for line in input_file if line.strip()] |
| 37 | + except Exception as e: |
| 38 | + log_message(log_file_path, f'error reading input file: {str(e)}') |
| 39 | + return |
| 40 | + |
| 41 | + try: |
| 42 | + output_file = open(output_file_path, 'w', encoding='utf-8') |
| 43 | + except Exception as e: |
| 44 | + log_message(log_file_path, f'error opening output file: {str(e)}') |
| 45 | + return |
| 46 | + |
| 47 | + files_processed = 0 |
| 48 | + sleep_seconds = sleep_value / 1000.0 if sleep_value else None |
| 49 | + |
| 50 | + try: |
| 51 | + for folder in folders: |
| 52 | + try: |
| 53 | + for root, _, files in os.walk(folder): |
| 54 | + for file_name in files: |
| 55 | + try: |
| 56 | + file_path = os.path.join(root, file_name) |
| 57 | + file_data = process_file(file_path) |
| 58 | + output_line = '\t'.join(str(x) for x in file_data) |
| 59 | + output_file.write(output_line + '\n') |
| 60 | + output_file.flush() |
| 61 | + |
| 62 | + files_processed += 1 |
| 63 | + if files_processed % 1000 == 0: |
| 64 | + log_message(log_file_path, |
| 65 | + f'scanned {files_processed} files') |
| 66 | + |
| 67 | + if (sleep_seconds and files_sleep and |
| 68 | + files_processed % files_sleep == 0): |
| 69 | + time.sleep(sleep_seconds) |
| 70 | + |
| 71 | + except Exception as e: |
| 72 | + log_message(log_file_path, |
| 73 | + f'error processing file {file_path}: {str(e)}') |
| 74 | + continue |
| 75 | + |
| 76 | + except Exception as e: |
| 77 | + log_message(log_file_path, |
| 78 | + f'error opening the folder {folder}: {str(e)}') |
| 79 | + continue |
| 80 | + |
| 81 | + finally: |
| 82 | + output_file.close() |
| 83 | + log_message(log_file_path, 'finish') |
| 84 | + |
| 85 | +def main(): |
| 86 | + parser = argparse.ArgumentParser(description='File Scanner') |
| 87 | + parser.add_argument('input_file', help='Path to input file with folders list') |
| 88 | + parser.add_argument('--sleep_value', type=int, |
| 89 | + help='Sleep time in milliseconds') |
| 90 | + parser.add_argument('--files_sleep', type=int, |
| 91 | + help='Number of files after which to sleep') |
| 92 | + parser.add_argument('output_file', help='Path to output file') |
| 93 | + |
| 94 | + args = parser.parse_args() |
| 95 | + |
| 96 | + scan_folders(args.input_file, args.output_file, |
| 97 | + args.sleep_value, args.files_sleep) |
| 98 | + |
| 99 | +if __name__ == '__main__': |
| 100 | + main() |
0 commit comments