aboutsummaryrefslogtreecommitdiff
path: root/scrape_ecourtindia_v6/scraper.py
diff options
context:
space:
mode:
authorRaghuram Subramani <raghus2247@gmail.com>2025-03-26 22:06:32 +0530
committerRaghuram Subramani <raghus2247@gmail.com>2025-03-26 22:06:32 +0530
commitef63d21480f1f83a660902da3f9ad2d5606b37c2 (patch)
tree322b1d1e8da88a62e1cfd4b0c767f53d3460203d /scrape_ecourtindia_v6/scraper.py
parent24b38a94e36794e33a1a432ef00eaf0c46957124 (diff)
multi-threaded, headless scraper
Diffstat (limited to 'scrape_ecourtindia_v6/scraper.py')
-rw-r--r--scrape_ecourtindia_v6/scraper.py51
1 files changed, 42 insertions, 9 deletions
diff --git a/scrape_ecourtindia_v6/scraper.py b/scrape_ecourtindia_v6/scraper.py
index 69d3336..cdab2fd 100644
--- a/scrape_ecourtindia_v6/scraper.py
+++ b/scrape_ecourtindia_v6/scraper.py
@@ -6,6 +6,7 @@ from urllib import request
from selenium.webdriver import Firefox
from selenium.webdriver.common.by import By
+from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.select import Select
from bs4 import BeautifulSoup
@@ -19,17 +20,14 @@ class Scraper:
self.db = db
self.config = config
- self.driver = Firefox()
+ options = Options()
+ options.add_argument("--headless")
+
+ self.driver = Firefox(options=options)
self.driver.get('https://services.ecourts.gov.in/ecourtindia_v6/?p=casestatus/index')
self.current_view = {}
- def run(self):
- self.close_modal()
- self.goto_acts()
- self.select_act()
- self.handle_table()
-
def close_modal(self):
sleep(3)
self.driver.execute_script('closeModel({modal_id:"validateError"})')
@@ -50,7 +48,42 @@ class Scraper:
self.driver.find_element(By.ID, 'radDAct').click()
self.submit_search()
- def goto_acts(self):
+ def scrape_states(self):
+ element = self.driver.find_element(By.ID, 'sess_state_code')
+ options = Select(element).options
+ states = [ option.text for option in options[1:] ]
+ print(f'STATES: {states}')
+
+ sleep(0.2)
+
+ return states
+
+ def scrape_districts(self, state):
+ self.select('sess_state_code', state)
+ sleep(0.2)
+
+ element = self.driver.find_element(By.ID, 'sess_dist_code')
+ options = Select(element).options
+ districts = [ option.text for option in options[1:] ]
+ print(f'DISTRICTS: {districts}')
+
+ return districts
+
+ def scrape_complexes(self, state, district):
+ self.select('sess_state_code', state)
+ sleep(0.2)
+ self.select('sess_dist_code', district)
+ sleep(0.2)
+
+ element = self.driver.find_element(By.ID, 'court_complex_code')
+ options = Select(element).options
+ complexes = [ option.text for option in options[1:] ]
+ print(f'COMPLEXES: {complexes}')
+
+ return complexes
+
+ def select_court(self):
+ sleep(2)
while True:
self.select('sess_state_code', self.config['state'])
self.select('sess_dist_code', self.config['district'])
@@ -66,7 +99,7 @@ class Scraper:
self.select('court_est_code', self.config['court_establishment'])
- sleep(1)
+ def goto_acts(self):
element = self.driver.find_element(By.ID, 'act-tabMenu')
element.click()
sleep(1)