1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
from app.modules.interface import Interface
from tinydb import TinyDB
from bs4 import BeautifulSoup
import time
import csv
def scrape_cases(name, acts, section, state_code):
db = TinyDB(f'app/outputs/{name}.json')
interface = Interface()
try:
districts = interface.get_districts(state_code)
except Exception as e:
print(f"[ERROR] Failed to scrape districts: {e}")
districts = []
for dist_code, dist_name in districts:
print(f'DISTRICT: {dist_name}')
try:
complexes = interface.get_complexes(state_code, dist_code)
except Exception as e:
print(f"[ERROR] Failed to scrape complexes for {dist_name}: {e}")
continue
for complex_code, complex_name in complexes:
print(f'COMPLEX: {complex_name}')
court_establishments = str(complex_code).split(',')
for i, court_establishment in enumerate(court_establishments, 1):
print(f'ESTABLISHMENT: {i}/{len(court_establishments)}')
for act in acts:
try:
cases = interface.search_by_act(state_code, dist_code, court_establishment, act, section)
except Exception as e:
print(f"[ERROR] Failed to scrape cases in complex {complex_name}: {e}")
continue
for j, case in enumerate(cases, 1):
print(f'CASE: {j}/{len(cases)}')
try:
case_no = case['case_no']
case_history = interface.case_history(state_code, dist_code, court_establishment, case_no)
except Exception as e:
print(f"[ERROR] Failed to get history for case {case.get('case_no', 'UNKNOWN')}: {e}")
continue
try:
case_history['case_no'] = case_no
case_history['complex_name'] = complex_name
db.insert(case_history)
except Exception as e:
print(f"[ERROR] Failed to parse orders for case {case_no}: {e}")
entries = db.all()
key_mapping = {
'case_no': 'Case Number',
'cino': 'CNR Number',
'type_name': 'Case Type',
'reg_no': 'Registration Number',
'reg_year': 'Registration Year',
'district_name': 'District',
'complex_name': 'Complex Name',
'court_name': 'Court Name',
'dt_regis': 'Registration Date',
'date_of_filing': 'Date of Filing',
'date_of_decision': 'Date of Decision',
'disp_name': 'Disposition',
'acts': 'Acts',
'pet_name': 'Petitioner',
'pet_adv': 'Petitioner Advocate',
'petparty_name': 'Petitioner Party Name',
'res_name': 'Respondent',
'res_adv': 'Respondent Advocate',
'resparty_name': 'Respondent Party Name'
}
all_acts = []
for entry in entries:
soup = BeautifulSoup(entry.get('finalOrder') or '', features="html.parser")
final_orders = []
for row in soup.select('table.tbl-result tbody tr'):
cells = row.find_all('td')
if len(cells) >= 2:
order_date = cells[1].get_text(strip=True)
link_tag = cells[2].find('a', href=True) if len(cells) > 2 else None
if link_tag:
final_orders.append({'date': order_date, 'link': link_tag['href']})
soup = BeautifulSoup(entry.get('interimOrder') or '', features="html.parser")
interim_orders = []
for row in soup.select('table.tbl-result tbody tr'):
cells = row.find_all('td')
if len(cells) >= 2:
order_date = cells[1].get_text(strip=True)
link_tag = cells[2].find('a', href=True) if len(cells) > 2 else None
if link_tag:
interim_orders.append({'date': order_date, 'link': link_tag['href']})
act_html = entry.get('act', '')
soup = BeautifulSoup(act_html, 'html.parser')
acts = []
for row in soup.select('tbody tr'):
cells = row.find_all('td')
if len(cells) == 2:
act = cells[0].get_text(strip=True)
section = cells[1].get_text(strip=True)
if act not in all_acts:
all_acts.append(act)
acts.append(f"{act}: {section}")
entry['acts'] = '\n'.join(acts)
entry['final_orders'] = final_orders
entry['interim_orders'] = interim_orders
max_final = max(len(entry.get('final_orders', [])) for entry in entries)
max_interim = max(len(entry.get('interim_orders', [])) for entry in entries)
with open(f'app/outputs/{name}.csv', 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
headers = list(key_mapping.values())
headers += [f'Final Order {i+1}' for i in range(max_final)]
headers += [f'Interim Order {i+1}' for i in range(max_interim)]
writer.writerow(headers)
for entry in entries:
row = []
for key in key_mapping:
row.append(entry.get(key, ''))
final_orders = entry.get('final_orders', [])
for order in final_orders:
hyperlink = f'=HYPERLINK("{order["link"]}", "{order["date"]}")'
row.append(hyperlink)
row += [''] * (max_final - len(final_orders))
interim_orders = entry.get('interim_orders', [])
for order in interim_orders:
hyperlink = f'=HYPERLINK("{order["link"]}", "{order["date"]}")'
row.append(hyperlink)
row += [''] * (max_interim - len(interim_orders))
writer.writerow(row)
|