diff options
author | Raghuram Subramani <raghus2247@gmail.com> | 2025-03-27 23:50:36 +0530 |
---|---|---|
committer | Raghuram Subramani <raghus2247@gmail.com> | 2025-03-27 23:50:36 +0530 |
commit | 97d1df0cd10f9f4adc1991cc8067cc8f1d3978cf (patch) | |
tree | e2bd866c6536f6a91d46c6171d4b7ab11f26f57d /scrape_ecourtindia_v6/create_named_pdfs.py | |
parent | 6aca68ddf318a6ae3852966107281c5f8642bb66 (diff) |
add create named pdfs
Diffstat (limited to 'scrape_ecourtindia_v6/create_named_pdfs.py')
-rw-r--r-- | scrape_ecourtindia_v6/create_named_pdfs.py | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/scrape_ecourtindia_v6/create_named_pdfs.py b/scrape_ecourtindia_v6/create_named_pdfs.py new file mode 100644 index 0000000..c47c66e --- /dev/null +++ b/scrape_ecourtindia_v6/create_named_pdfs.py @@ -0,0 +1,25 @@ +import re +import shutil +from tinydb import TinyDB + +def sanitize_filename(filename): + filename = re.sub(r'[<>:"/\\|?*()]', '_', filename) + filename = re.sub(r'_+', '_', filename) + filename = filename.strip('_ ') + + return filename + +db = TinyDB('orders.json') +entries = db.all() + +for entry in entries: + date = sanitize_filename(entry['date']) + case_info = sanitize_filename(entry['case_info']) + court_name = sanitize_filename(entry['court_name']) + + newname = f"named_pdf/{date}---{case_info}---{court_name}.pdf" + + try: + shutil.copyfile(entry['filename'], newname) + except Exception as e: + print(f"Error copying {entry['filename']}: {e}") |