aboutsummaryrefslogtreecommitdiff
path: root/scrape_ecourtindia_v6/main.py
diff options
context:
space:
mode:
Diffstat (limited to 'scrape_ecourtindia_v6/main.py')
-rw-r--r--scrape_ecourtindia_v6/main.py70
1 files changed, 66 insertions, 4 deletions
diff --git a/scrape_ecourtindia_v6/main.py b/scrape_ecourtindia_v6/main.py
index c81d0b6..1cadad2 100644
--- a/scrape_ecourtindia_v6/main.py
+++ b/scrape_ecourtindia_v6/main.py
@@ -1,18 +1,80 @@
+import csv
from scraper import Scraper
from tinydb import TinyDB
-import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import threading
db = TinyDB('db.json')
-if __name__ == '__main__':
+class ThreadSafeCSVWriter:
+ def __init__(self, filename):
+ self.file = open(filename, 'w', newline='')
+ self.writer = csv.writer(self.file)
+ self.lock = threading.Lock()
+
+ def writerow(self, row):
+ with self.lock:
+ self.writer.writerow(row)
+
+ def close(self):
+ self.file.close()
+
+def scrape_state_thread(state, config, csv_writer):
+ scraper = Scraper(db, config)
+ scraper.close_modal()
+ try:
+ for district in scraper.scrape_districts(state):
+ for cmplx in scraper.scrape_complexes(state, district):
+ csv_writer.writerow([state, district, cmplx])
+ except Exception as e:
+ print(f"Error scraping {state}: {e}")
+ finally:
+ scraper.driver.quit()
+
+def scrape_courts():
config = {}
+ m = Scraper(db, config)
+ m.close_modal()
+
+ csv_writer = ThreadSafeCSVWriter('courts.csv')
+ csv_writer.writerow(['State', 'District', 'Complex'])
+
+ states = m.scrape_states()
+ m.driver.close()
+
+ with ThreadPoolExecutor(max_workers=5) as executor:
+ futures = [
+ executor.submit(scrape_state_thread, state, config, csv_writer)
+ for state in states
+ ]
+
+ for future in as_completed(futures):
+ try:
+ future.result()
+ except Exception as e:
+ print(f"A thread encountered an error: {e}")
+
+ csv_writer.close()
+
+def scrape_orders():
+ config = {}
+
+ m = Scraper(db, config)
+ m.close_modal()
+
config['state'] = input('Select a state: ')
config['district'] = input('Select a district: ')
config['court_complex'] = input('Select a court complex: ')
config['court_establishment'] = input('Select a court establishment: ')
config['act'] = input('Select an act: ')
- m = Scraper(db, config)
- m.run()
+ m.select_court()
+ m.goto_acts()
+ m.select_act()
+ m.handle_table()
+
m.driver.close()
+
+if __name__ == '__main__':
+ scrape_courts()