aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRaghuram Subramani <raghus2247@gmail.com>2025-05-11 19:20:58 +0530
committerRaghuram Subramani <raghus2247@gmail.com>2025-05-11 19:20:58 +0530
commita44d6029f2aed0389750ce0cd694a369450fef57 (patch)
treeb98a254b308d7fea6ddcb0ffc347d1ebeadce935
parentaf8665ad224f923bdffd26fa75e7d4a8714b976c (diff)
update
-rw-r--r--web/app/job_manager.py14
-rw-r--r--web/app/jobs/scrape_cases.py40
-rw-r--r--web/app/main.py8
-rw-r--r--web/app/templates/home.html5
4 files changed, 36 insertions, 31 deletions
diff --git a/web/app/job_manager.py b/web/app/job_manager.py
index abec11d..fa486f3 100644
--- a/web/app/job_manager.py
+++ b/web/app/job_manager.py
@@ -8,18 +8,22 @@ class JobManager:
self.redis = Redis()
self.q = Queue(connection=self.redis)
- def enqueue_scrape(self, name, acts, section, state_code):
+ def enqueue_scrape(self, name, acts, sections, state_code):
# 4 hour timeout
return self.q.enqueue(
scrape_cases,
name,
acts,
- section,
+ sections,
state_code,
job_timeout=14400
)
- def get_started_jobs(self):
+ def get_jobs(self):
started_job_ids = self.q.started_job_registry.get_job_ids()
- jobs = [Job.fetch(job_id, connection=self.redis) for job_id in started_job_ids]
- return jobs
+ started_jobs = [Job.fetch(job_id, connection=self.redis) for job_id in started_job_ids]
+
+ finished_job_ids = self.q.finished_job_registry.get_job_ids()
+ finished_jobs = [Job.fetch(job_id, connection=self.redis) for job_id in finished_job_ids]
+
+ return started_jobs + finished_jobs
diff --git a/web/app/jobs/scrape_cases.py b/web/app/jobs/scrape_cases.py
index 9cd4930..7a944f1 100644
--- a/web/app/jobs/scrape_cases.py
+++ b/web/app/jobs/scrape_cases.py
@@ -4,7 +4,8 @@ from bs4 import BeautifulSoup
import time
import csv
-def scrape_cases(name, acts, section, state_code):
+def scrape_cases(name, acts, sections, state_code):
+ acts = set(acts)
db = TinyDB(f'app/outputs/{name}.json')
interface = Interface()
@@ -31,29 +32,30 @@ def scrape_cases(name, acts, section, state_code):
print(f'ESTABLISHMENT: {i}/{len(court_establishments)}')
for act in acts:
- try:
- cases = interface.search_by_act(state_code, dist_code, court_establishment, act, section)
- except Exception as e:
- print(f"[ERROR] Failed to scrape cases in complex {complex_name}: {e}")
- continue
-
- for j, case in enumerate(cases, 1):
- print(f'CASE: {j}/{len(cases)}')
-
+ for section in sections:
try:
- case_no = case['case_no']
- case_history = interface.case_history(state_code, dist_code, court_establishment, case_no)
+ cases = interface.search_by_act(state_code, dist_code, court_establishment, act, section)
except Exception as e:
- print(f"[ERROR] Failed to get history for case {case.get('case_no', 'UNKNOWN')}: {e}")
+ print(f"[ERROR] Failed to scrape cases in complex {complex_name}: {e}")
continue
- try:
- case_history['case_no'] = case_no
- case_history['complex_name'] = complex_name
- db.insert(case_history)
+ for j, case in enumerate(cases, 1):
+ print(f'CASE: {j}/{len(cases)}')
- except Exception as e:
- print(f"[ERROR] Failed to parse orders for case {case_no}: {e}")
+ try:
+ case_no = case['case_no']
+ case_history = interface.case_history(state_code, dist_code, court_establishment, case_no)
+ except Exception as e:
+ print(f"[ERROR] Failed to get history for case {case.get('case_no', 'UNKNOWN')}: {e}")
+ continue
+
+ try:
+ case_history['case_no'] = case_no
+ case_history['complex_name'] = complex_name
+ db.insert(case_history)
+
+ except Exception as e:
+ print(f"[ERROR] Failed to parse orders for case {case_no}: {e}")
entries = db.all()
diff --git a/web/app/main.py b/web/app/main.py
index cc3f995..1266cb8 100644
--- a/web/app/main.py
+++ b/web/app/main.py
@@ -21,7 +21,7 @@ main = Blueprint('main', __name__)
@main.route('/')
@login_required
def home():
- jobs = job_manager.get_started_jobs()
+ jobs = job_manager.get_jobs()
return render_template('home.html', user=current_user, states=states, acts=act_list, jobs=jobs)
@main.route('/logout')
@@ -57,14 +57,14 @@ def create_user():
@login_required
def enqueue_job():
acts = request.form.getlist('act')
- section = request.form.get('section')
+ sections = request.form.get('section').split(',')
state_code = request.form.get('state_code')
name = request.form.get('name')
if not section:
section = ''
- job = job_manager.enqueue_scrape(f'{name} - {time.time_ns()}', acts, section, state_code)
+ job = job_manager.enqueue_scrape(f'{name} - {time.time_ns()}', acts, sections, state_code)
flash('Job created.', 'info')
return redirect(url_for('main.home'))
@@ -72,5 +72,5 @@ def enqueue_job():
@main.route('/download/<filename>')
@login_required
def download_output(filename):
- output_dir = os.path.join(os.getcwd(), 'outputs')
+ output_dir = os.path.join(os.getcwd(), 'app/outputs')
return send_from_directory(output_dir, f'{filename}.csv', as_attachment=True)
diff --git a/web/app/templates/home.html b/web/app/templates/home.html
index 809269d..d35767a 100644
--- a/web/app/templates/home.html
+++ b/web/app/templates/home.html
@@ -30,7 +30,8 @@
{% endfor %}
</select>
- <input type="text" name="section" placeholder="Section">
+ <label for="act">Comma Separated Section List</label>
+ <input type="text" name="section" placeholder="Sections">
<select name="state_code">
{% for code, name in states %}
@@ -56,7 +57,6 @@
<th scope="col">Job Name</th>
<th scope="col">Job Status</th>
<th scope="col">Output</th>
- <th scope="col">Log</th>
</tr>
</thead>
<tbody>
@@ -65,7 +65,6 @@
<td>{{ job.args[0] }}</td>
<td>{{ job._status }}</td>
<td><a href="{{ url_for('main.download_output', filename=job.args[0]) }}">Download</a></td>
- <td>Running</td>
</tr>
{% endfor %}
</tbody>