diff options
| author | Raghuram Subramani <raghus2247@gmail.com> | 2025-03-26 22:19:19 +0530 |
|---|---|---|
| committer | Raghuram Subramani <raghus2247@gmail.com> | 2025-03-26 22:19:19 +0530 |
| commit | f1f43d3448bc879eed55f1e6865c06e646b7eb4a (patch) | |
| tree | 3b67eefca59ffc4bd46b418ec401a3c36b753542 /scrape_ecourtindia_v6/main.py | |
| parent | ef63d21480f1f83a660902da3f9ad2d5606b37c2 (diff) | |
implement scraping of establishments
Diffstat (limited to 'scrape_ecourtindia_v6/main.py')
| -rw-r--r-- | scrape_ecourtindia_v6/main.py | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/scrape_ecourtindia_v6/main.py b/scrape_ecourtindia_v6/main.py index 1cadad2..9d4c193 100644 --- a/scrape_ecourtindia_v6/main.py +++ b/scrape_ecourtindia_v6/main.py @@ -6,6 +6,8 @@ import threading db = TinyDB('db.json') +SCRAPE_ESTABLISHMENTS = True + class ThreadSafeCSVWriter: def __init__(self, filename): self.file = open(filename, 'w', newline='') @@ -25,7 +27,11 @@ def scrape_state_thread(state, config, csv_writer): try: for district in scraper.scrape_districts(state): for cmplx in scraper.scrape_complexes(state, district): - csv_writer.writerow([state, district, cmplx]) + if SCRAPE_ESTABLISHMENTS: + for establishment in scraper.scrape_establishments(state, district, cmplx): + csv_writer.writerow([ state, district, cmplx, establishment ]) + else: + csv_writer.writerow([ state, district, cmplx ]) except Exception as e: print(f"Error scraping {state}: {e}") finally: @@ -43,7 +49,7 @@ def scrape_courts(): states = m.scrape_states() m.driver.close() - with ThreadPoolExecutor(max_workers=5) as executor: + with ThreadPoolExecutor(max_workers=10) as executor: futures = [ executor.submit(scrape_state_thread, state, config, csv_writer) for state in states |
