Skip to content

Commit f442b40

Browse files
authored
Create file_scanner.py
1 parent de8a30c commit f442b40

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed

file_scanner.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import os
2+
import sys
3+
import argparse
4+
import hashlib
5+
import time
6+
from datetime import datetime
7+
8+
def log_message(log_file_path, message):
9+
timestamp = datetime.now().strftime('%Y_%m_%d---%H:%M')
10+
with open(log_file_path, 'a', encoding='utf-8') as log_file:
11+
log_file.write(f'{timestamp} - {message}\n')
12+
13+
def process_file(file_path):
14+
complete_path = os.path.abspath(file_path)
15+
file_size = os.path.getsize(complete_path)
16+
modified_date = int(os.path.getmtime(complete_path))
17+
creation_date = int(os.path.getctime(complete_path))
18+
file_name_ext = os.path.basename(complete_path)
19+
20+
hash_string = f"{complete_path}\t{file_size}\t{modified_date}"
21+
file_hash = hashlib.sha256(hash_string.encode()).hexdigest()
22+
23+
return (complete_path, file_size, modified_date, file_hash,
24+
file_name_ext, creation_date)
25+
26+
def scan_folders(input_file_path, output_file_path, sleep_value=None,
27+
files_sleep=None):
28+
script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
29+
log_file_path = os.path.join(script_dir, 'file_scanner.log')
30+
31+
log_message(log_file_path, 'start')
32+
log_message(log_file_path, 'received command line')
33+
34+
try:
35+
with open(input_file_path, 'r', encoding='utf-8') as input_file:
36+
folders = [line.strip() for line in input_file if line.strip()]
37+
except Exception as e:
38+
log_message(log_file_path, f'error reading input file: {str(e)}')
39+
return
40+
41+
try:
42+
output_file = open(output_file_path, 'w', encoding='utf-8')
43+
except Exception as e:
44+
log_message(log_file_path, f'error opening output file: {str(e)}')
45+
return
46+
47+
files_processed = 0
48+
sleep_seconds = sleep_value / 1000.0 if sleep_value else None
49+
50+
try:
51+
for folder in folders:
52+
try:
53+
for root, _, files in os.walk(folder):
54+
for file_name in files:
55+
try:
56+
file_path = os.path.join(root, file_name)
57+
file_data = process_file(file_path)
58+
output_line = '\t'.join(str(x) for x in file_data)
59+
output_file.write(output_line + '\n')
60+
output_file.flush()
61+
62+
files_processed += 1
63+
if files_processed % 1000 == 0:
64+
log_message(log_file_path,
65+
f'scanned {files_processed} files')
66+
67+
if (sleep_seconds and files_sleep and
68+
files_processed % files_sleep == 0):
69+
time.sleep(sleep_seconds)
70+
71+
except Exception as e:
72+
log_message(log_file_path,
73+
f'error processing file {file_path}: {str(e)}')
74+
continue
75+
76+
except Exception as e:
77+
log_message(log_file_path,
78+
f'error opening the folder {folder}: {str(e)}')
79+
continue
80+
81+
finally:
82+
output_file.close()
83+
log_message(log_file_path, 'finish')
84+
85+
def main():
86+
parser = argparse.ArgumentParser(description='File Scanner')
87+
parser.add_argument('input_file', help='Path to input file with folders list')
88+
parser.add_argument('--sleep_value', type=int,
89+
help='Sleep time in milliseconds')
90+
parser.add_argument('--files_sleep', type=int,
91+
help='Number of files after which to sleep')
92+
parser.add_argument('output_file', help='Path to output file')
93+
94+
args = parser.parse_args()
95+
96+
scan_folders(args.input_file, args.output_file,
97+
args.sleep_value, args.files_sleep)
98+
99+
if __name__ == '__main__':
100+
main()

0 commit comments

Comments
 (0)