aboutsummaryrefslogtreecommitdiff
path: root/scrape_ecourtindia_v6/main.py
blob: 9d4c1936733548b247f57c46593ee43ac6085180 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import csv
from scraper import Scraper
from tinydb import TinyDB
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading

db = TinyDB('db.json')

SCRAPE_ESTABLISHMENTS = True

class ThreadSafeCSVWriter:
    def __init__(self, filename):
        self.file = open(filename, 'w', newline='')
        self.writer = csv.writer(self.file)
        self.lock = threading.Lock()

    def writerow(self, row):
        with self.lock:
            self.writer.writerow(row)

    def close(self):
        self.file.close()

def scrape_state_thread(state, config, csv_writer):
    scraper = Scraper(db, config)
    scraper.close_modal()
    try:
        for district in scraper.scrape_districts(state):
            for cmplx in scraper.scrape_complexes(state, district):
                if SCRAPE_ESTABLISHMENTS:
                    for establishment in scraper.scrape_establishments(state, district, cmplx):
                        csv_writer.writerow([ state, district, cmplx, establishment ])
                else:
                    csv_writer.writerow([ state, district, cmplx ])
    except Exception as e:
        print(f"Error scraping {state}: {e}")
    finally:
        scraper.driver.quit()

def scrape_courts():
    config = {}

    m = Scraper(db, config)
    m.close_modal()

    csv_writer = ThreadSafeCSVWriter('courts.csv')
    csv_writer.writerow(['State', 'District', 'Complex'])

    states = m.scrape_states()
    m.driver.close()

    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [
            executor.submit(scrape_state_thread, state, config, csv_writer) 
            for state in states
        ]

        for future in as_completed(futures):
            try:
                future.result()
            except Exception as e:
                print(f"A thread encountered an error: {e}")

    csv_writer.close()

def scrape_orders():
    config = {}

    m = Scraper(db, config)
    m.close_modal()

    config['state'] = input('Select a state: ')
    config['district'] = input('Select a district: ')
    config['court_complex'] = input('Select a court complex: ')
    config['court_establishment'] = input('Select a court establishment: ')
    config['act'] = input('Select an act: ')

    m.select_court()
    m.goto_acts()
    m.select_act()
    m.handle_table()

    m.driver.close()

if __name__ == '__main__':
    scrape_courts()