diff options
Diffstat (limited to 'scrape_ecourtindia_v6/scrape_orders.py')
| -rw-r--r-- | scrape_ecourtindia_v6/scrape_orders.py | 113 |
1 files changed, 46 insertions, 67 deletions
diff --git a/scrape_ecourtindia_v6/scrape_orders.py b/scrape_ecourtindia_v6/scrape_orders.py index 74cdd56..54a2d80 100644 --- a/scrape_ecourtindia_v6/scrape_orders.py +++ b/scrape_ecourtindia_v6/scrape_orders.py @@ -1,82 +1,71 @@ import csv from time import sleep + +from tinydb import TinyDB from modules.scraper_orders import ScraperOrders from selenium.webdriver.common.by import By from concurrent.futures import ThreadPoolExecutor, as_completed + import threading -class ThreadSafeCSVWriter: - def __init__(self, filename): - self.file = open(filename, 'w', newline='') - self.writer = csv.writer(self.file) +class ThreadSafeDB: + def __init__(self): + self.db = TinyDB('orders.json') self.lock = threading.Lock() - - def writerow(self, row): + + def insert(self, record): with self.lock: - self.writer.writerow(row) - print(f'Wrote: {row}') + self.db.insert(record) + print(f'INSERTED: {record}') - def close(self): - self.file.close() +db = ThreadSafeDB() -def scrape_district(state, district, csv_writer): +def scrape_single_court(row): try: config = {} - scraper = ScraperOrders(config) + scraper = ScraperOrders(db, config) scraper.close_modal() - scraper.select('sess_state_code', state) - scraper.select('sess_dist_code', district) - - complexes = scraper.scrape_complexes() - scraper.select('court_complex_code', complexes[0]) - - sleep(2) - scraper.goto_courtnumber() - - for cmplx in complexes: - while True: - sleep(0.5) - try: - modal_is_open = scraper.driver.find_element(By.CLASS_NAME, 'modal').is_displayed() - if modal_is_open: - scraper.close_modal() - continue - break - except: - break - - scraper.select('court_complex_code', cmplx) + scraper.select('sess_state_code', row[0]) + scraper.select('sess_dist_code', row[1]) + + while True: sleep(0.5) - - court_numbers = scraper.get_court_numbers() - for court_number in court_numbers: - row = [state, district, cmplx, court_number] - csv_writer.writerow(row) + try: + modal_is_open = scraper.driver.find_element(By.CLASS_NAME, 'modal').is_displayed() + if modal_is_open: + scraper.close_modal() + continue + break + except: + break + + scraper.select('court_complex_code', row[2]) + sleep(1) + scraper.goto_courtnumber() + scraper.select('nnjudgecode1', row[3]) + + scraper.driver.find_element(By.ID, 'radBoth2').click() + + scraper.submit_search() + scraper.parse_orders_table() + scraper.handle_orders() scraper.driver.quit() except Exception as e: - print(f"Error scraping district {district}: {e}") + print(f"Error processing court {row}: {e}") -def scrape_courts(): - state = 'Uttar Pradesh' - - config = {} - scraper = ScraperOrders(config) - scraper.close_modal() - scraper.select('sess_state_code', state) - - districts = scraper.scrape_districts() - scraper.driver.quit() - - csv_writer = ThreadSafeCSVWriter('csv/court_numbers.csv') - csv_writer.writerow(['State', 'District', 'Cmplx', 'Court number']) +def scrape_orders(courts_csv): + with open(courts_csv, newline='') as csvfile: + reader = csv.reader(csvfile) + next(reader, None) + courts = list(reader) with ThreadPoolExecutor(max_workers=5) as executor: futures = [ - executor.submit(scrape_district, state, district, csv_writer) - for district in districts + executor.submit(scrape_single_court, court) + for court in courts ] for future in as_completed(futures): @@ -85,16 +74,6 @@ def scrape_courts(): except Exception as e: print(f"A thread encountered an error: {e}") - csv_writer.close() - -def scrape_orders(courts): - csvfile = open(courts, newline='') - reader = csv.reader(csvfile) - - for row in reader: - print(row) - - csvfile.close() - if __name__ == '__main__': - scrape_orders('csv/2023-24_pocso.csv') + input_file = 'csv/2023-24_pocso.csv' + scrape_orders(input_file) |
