diff options
Diffstat (limited to 'web/app/jobs/scrape_cases.py')
-rw-r--r-- | web/app/jobs/scrape_cases.py | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/web/app/jobs/scrape_cases.py b/web/app/jobs/scrape_cases.py new file mode 100644 index 0000000..ec31f8a --- /dev/null +++ b/web/app/jobs/scrape_cases.py @@ -0,0 +1,68 @@ +from modules.interface import Interface +from tinydb import TinyDB +import time + +def scrape_cases(act, section, state_code, name=time.time_ns()): + db = TinyDB(f'{name}.json') + interface = Interface() + + def get_act_number(acts): + for act_code, act_name in acts: + if act_name == act: + return act_code + return None + try: + districts = interface.get_districts(state_code) + except Exception as e: + print(f"[ERROR] Failed to scrape districts: {e}") + districts = [] + + for dist_code, dist_name in districts: + print(f'DISTRICT: {dist_name}') + + try: + complexes = interface.get_complexes(state_code, dist_code) + except Exception as e: + print(f"[ERROR] Failed to scrape complexes for {dist_name}: {e}") + continue + + for complex_code, complex_name in complexes: + print(f'COMPLEX: {complex_name}') + + court_establishments = str(complex_code).split(',') + for i, court_establishment in enumerate(court_establishments, 1): + print(f'ESTABLISHMENT: {i}/{len(court_establishments)}') + + try: + acts = interface.get_acts(state_code, dist_code, court_establishment) + act_number = get_act_number(acts) + except Exception as e: + print(f"[ERROR] Failed to scrape acts for complex {complex_name}: {e}") + continue + + if not act_number: + continue + + try: + cases = interface.search_by_act(state_code, dist_code, court_establishment, act_number, section) + except Exception as e: + print(f"[ERROR] Failed to scrape cases in complex {complex_name}: {e}") + continue + + for j, case in enumerate(cases, 1): + print(f'CASE: {j}/{len(cases)}') + + try: + case_no = case['case_no'] + case_history = interface.case_history(state_code, dist_code, court_establishment, case_no) + except Exception as e: + print(f"[ERROR] Failed to get history for case {case.get('case_no', 'UNKNOWN')}: {e}") + continue + + try: + case_history['case_no'] = case_no + case_history['complex_name'] = complex_name + db.insert(case_history) + + except Exception as e: + print(f"[ERROR] Failed to parse orders for case {case_no}: {e}") |