aboutsummaryrefslogtreecommitdiff
path: root/scrape_ecourtindia_v6/main.py
diff options
context:
space:
mode:
authorRaghuram Subramani <raghus2247@gmail.com>2025-03-26 22:19:19 +0530
committerRaghuram Subramani <raghus2247@gmail.com>2025-03-26 22:19:19 +0530
commitf1f43d3448bc879eed55f1e6865c06e646b7eb4a (patch)
tree3b67eefca59ffc4bd46b418ec401a3c36b753542 /scrape_ecourtindia_v6/main.py
parentef63d21480f1f83a660902da3f9ad2d5606b37c2 (diff)
implement scraping of establishments
Diffstat (limited to 'scrape_ecourtindia_v6/main.py')
-rw-r--r--scrape_ecourtindia_v6/main.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/scrape_ecourtindia_v6/main.py b/scrape_ecourtindia_v6/main.py
index 1cadad2..9d4c193 100644
--- a/scrape_ecourtindia_v6/main.py
+++ b/scrape_ecourtindia_v6/main.py
@@ -6,6 +6,8 @@ import threading
db = TinyDB('db.json')
+SCRAPE_ESTABLISHMENTS = True
+
class ThreadSafeCSVWriter:
def __init__(self, filename):
self.file = open(filename, 'w', newline='')
@@ -25,7 +27,11 @@ def scrape_state_thread(state, config, csv_writer):
try:
for district in scraper.scrape_districts(state):
for cmplx in scraper.scrape_complexes(state, district):
- csv_writer.writerow([state, district, cmplx])
+ if SCRAPE_ESTABLISHMENTS:
+ for establishment in scraper.scrape_establishments(state, district, cmplx):
+ csv_writer.writerow([ state, district, cmplx, establishment ])
+ else:
+ csv_writer.writerow([ state, district, cmplx ])
except Exception as e:
print(f"Error scraping {state}: {e}")
finally:
@@ -43,7 +49,7 @@ def scrape_courts():
states = m.scrape_states()
m.driver.close()
- with ThreadPoolExecutor(max_workers=5) as executor:
+ with ThreadPoolExecutor(max_workers=10) as executor:
futures = [
executor.submit(scrape_state_thread, state, config, csv_writer)
for state in states