diff options
author | Raghuram Subramani <raghus2247@gmail.com> | 2025-03-27 23:05:13 +0530 |
---|---|---|
committer | Raghuram Subramani <raghus2247@gmail.com> | 2025-03-27 23:05:13 +0530 |
commit | a02c8f4c8643b4b9a531e185813c5d82b6866ec0 (patch) | |
tree | a3cdd49df8412e63ac711c148df6814efa0a05e7 /scrape_ecourtindia_v6/orders_scrape_courts.py | |
parent | 7195110a466b0ed14de1b8ee4fa8d7bb79626018 (diff) |
update
Diffstat (limited to 'scrape_ecourtindia_v6/orders_scrape_courts.py')
-rw-r--r-- | scrape_ecourtindia_v6/orders_scrape_courts.py | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/scrape_ecourtindia_v6/orders_scrape_courts.py b/scrape_ecourtindia_v6/orders_scrape_courts.py new file mode 100644 index 0000000..597ce9f --- /dev/null +++ b/scrape_ecourtindia_v6/orders_scrape_courts.py @@ -0,0 +1,130 @@ +import csv +from time import sleep +from modules.scraper_orders import ScraperOrders +from selenium.webdriver.common.by import By +from concurrent.futures import ThreadPoolExecutor, as_completed +import threading + +class ThreadSafeCSVWriter: + def __init__(self, filename): + self.file = open(filename, 'w', newline='') + self.writer = csv.writer(self.file) + self.lock = threading.Lock() + + def writerow(self, row): + with self.lock: + self.writer.writerow(row) + print(f'Wrote: {row}') + + def close(self): + self.file.close() + +def scrape_district(state, district, csv_writer): + try: + config = {} + scraper = ScraperOrders(config) + scraper.close_modal() + + scraper.select('sess_state_code', state) + scraper.select('sess_dist_code', district) + + complexes = scraper.scrape_complexes() + scraper.select('court_complex_code', complexes[0]) + + sleep(2) + scraper.goto_courtnumber() + + for cmplx in complexes: + while True: + sleep(0.5) + try: + modal_is_open = scraper.driver.find_element(By.CLASS_NAME, 'modal').is_displayed() + if modal_is_open: + scraper.close_modal() + continue + break + except: + break + + scraper.select('court_complex_code', cmplx) + sleep(0.5) + + court_numbers = scraper.get_court_numbers() + for court_number in court_numbers: + row = [state, district, cmplx, court_number] + csv_writer.writerow(row) + + scraper.driver.quit() + + except Exception as e: + print(f"Error scraping district {district}: {e}") + +def scrape_courts(): + state = 'Uttar Pradesh' + + config = {} + scraper = ScraperOrders(config) + scraper.close_modal() + scraper.select('sess_state_code', state) + + districts = scraper.scrape_districts() + scraper.driver.quit() + + csv_writer = ThreadSafeCSVWriter('csv/court_numbers.csv') + csv_writer.writerow(['State', 'District', 'Cmplx', 'Court number']) + + with ThreadPoolExecutor(max_workers=5) as executor: + futures = [ + executor.submit(scrape_district, state, district, csv_writer) + for district in districts + ] + + for future in as_completed(futures): + try: + future.result() + except Exception as e: + print(f"A thread encountered an error: {e}") + + csv_writer.close() + +def scrape_orders(courts): + csvfile = open(courts, newline='') + reader = csv.reader(csvfile) + + for row in reader: + print(row) + config = {} + scraper = ScraperOrders(config) + scraper.close_modal() + + scraper.select('sess_state_code', row[0]) + scraper.select('sess_dist_code', row[1]) + + while True: + sleep(0.5) + try: + modal_is_open = scraper.driver.find_element(By.CLASS_NAME, 'modal').is_displayed() + if modal_is_open: + scraper.close_modal() + continue + break + except: + break + + scraper.select('court_complex_code', row[2]) + sleep(1) + scraper.goto_courtnumber() + + scraper.select('nnjudgecode1', row[3]) + scraper.driver.find_element(By.ID, 'radBoth2').click() + scraper.submit_search() + + scraper.parse_orders_table() + scraper.handle_orders() + + break + + csvfile.close() + +if __name__ == '__main__': + scrape_orders('csv/2023-24_pocso.csv') |