aboutsummaryrefslogtreecommitdiff
path: root/scrape_ecourtindia_v6/modules/scraper_case_status.py
diff options
context:
space:
mode:
Diffstat (limited to 'scrape_ecourtindia_v6/modules/scraper_case_status.py')
-rw-r--r--scrape_ecourtindia_v6/modules/scraper_case_status.py156
1 files changed, 156 insertions, 0 deletions
diff --git a/scrape_ecourtindia_v6/modules/scraper_case_status.py b/scrape_ecourtindia_v6/modules/scraper_case_status.py
new file mode 100644
index 0000000..d9b925d
--- /dev/null
+++ b/scrape_ecourtindia_v6/modules/scraper_case_status.py
@@ -0,0 +1,156 @@
+from time import sleep
+import os
+import uuid
+
+from urllib import request
+
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.select import Select
+
+from bs4 import BeautifulSoup
+
+import cv2
+import pytesseract
+import tempfile
+
+from tinydb import TinyDB
+
+from .scraper import Scraper
+
+class ScraperCaseStatus(Scraper):
+ def __init__(self, config):
+ Scraper.__init__(self, 'https://services.ecourts.gov.in/ecourtindia_v6/?p=casestatus/index')
+
+ self.db = TinyDB('db.json')
+ self.config = config
+
+ def select_act(self):
+ self.select('actcode', self.config['act'])
+ sleep(1)
+
+ # Disposed only
+ self.driver.find_element(By.ID, 'radDAct').click()
+ self.submit_search()
+
+ def select_court(self):
+ sleep(2)
+ while True:
+ self.select('sess_state_code', self.config['state'])
+ self.select('sess_dist_code', self.config['district'])
+ self.select('court_complex_code', self.config['court_complex'])
+
+ sleep(2)
+ modal_is_open = self.driver.find_element(By.CLASS_NAME, 'alert-danger-cust').is_displayed()
+ if modal_is_open:
+ self.close_modal()
+ continue
+
+ break
+
+ self.select('court_est_code', self.config['court_establishment'])
+
+ def goto_acts(self):
+ element = self.driver.find_element(By.ID, 'act-tabMenu')
+ element.click()
+ sleep(1)
+
+ def submit_search(self):
+ captcha_incomplete = True
+ while captcha_incomplete:
+ sleep(2)
+ img = self.driver.find_element(By.ID, 'captcha_image')
+ temp = tempfile.NamedTemporaryFile(suffix='.png')
+ img.screenshot(temp.name)
+
+ img = cv2.imread(temp.name)
+ text = pytesseract.image_to_string(img).strip()
+
+ element = self.driver.find_element(By.ID, 'act_captcha_code')
+ element.send_keys(text)
+
+ self.driver.execute_script('submitAct()')
+ sleep(3)
+
+ if self.driver.find_element(By.CLASS_NAME, 'alert-danger-cust').is_displayed():
+ self.close_modal()
+ element.clear()
+ else:
+ captcha_incomplete = False
+
+ def handle_table(self):
+ table_innerhtml = self.driver.find_element(By.ID, 'dispTable').get_attribute('innerHTML')
+ self.rows = BeautifulSoup(str(table_innerhtml), 'html.parser').find_all('td')
+ self.views = []
+ i = 5
+ while i < len(self.rows):
+ view = self.rows[i]
+
+ self.current_view = {
+ 'case_info': self.rows[i-2].get_text(strip=True),
+ 'petitioner_respondent': ' Vs '.join(self.rows[i-1].get_text(strip=True).split('Vs')),
+ 'htmlfile': '',
+ 'pdfs': []
+ }
+
+ script = view.find_all('a')[0].get_attribute_list('onclick')[0]
+ self.driver.execute_script(script)
+ sleep(1)
+
+ html = str(self.driver.find_element(By.ID, 'CSact').get_attribute('innerHTML'))
+
+ while True:
+ filename = f"html/{uuid.uuid4().hex}.html"
+ if not os.path.exists(filename):
+ break
+
+ self.current_view['htmlfile'] = filename
+ with open(filename, "w", encoding="utf-8") as f:
+ f.write(html)
+
+ self.parse_orders_table()
+
+ self.db.insert(self.current_view)
+ print(f'INSERTED: {self.current_view}')
+ self.driver.find_element(By.ID, 'main_back_act').click()
+ i += 4
+
+ def parse_orders_table(self):
+ try:
+ table_innerhtml = self.driver.find_element(By.CLASS_NAME, 'order_table').get_attribute('innerHTML')
+ except:
+ return
+
+ rows = BeautifulSoup(str(table_innerhtml), 'html.parser').find_all('td')
+ self.orders = []
+ i = 5
+ while i < len(rows):
+ self.orders.append(rows[i])
+ i += 3
+
+ self.handle_orders()
+
+ def handle_orders(self):
+ for order in self.orders:
+ script = order.find_all('a')[0].get_attribute_list('onclick')[0]
+ self.driver.execute_script(script)
+
+ sleep(2)
+ obj = self.driver.find_element(By.TAG_NAME, 'object')
+ pdf_url = str(obj.get_attribute('data'))
+
+ while True:
+ filename = f"pdf/{uuid.uuid4().hex}.pdf"
+ if not os.path.exists(filename):
+ break
+ self.current_view['pdfs'].append(filename)
+ cookies = "; ".join([f"{c['name']}={c['value']}" for c in self.driver.get_cookies()])
+ r = request.Request(pdf_url)
+ r.add_header("Cookie", cookies)
+
+ try:
+ with request.urlopen(r) as response, open(filename, "wb") as file:
+ file.write(response.read())
+ except:
+ print(f'UNABLE TO FETCH PDF: {pdf_url}')
+
+ self.driver.find_element(By.ID, 'modalOders').find_element(By.CLASS_NAME, 'btn-close').click()