aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRaghuram Subramani <raghus2247@gmail.com>2025-03-27 23:20:24 +0530
committerRaghuram Subramani <raghus2247@gmail.com>2025-03-27 23:20:24 +0530
commit959c5e6eaf0b5879c8277fe58685d28ec94f98d0 (patch)
treeb24ac310ecca48f1f9717adcb632848600feb52e
parent6d02f36c2a45b0adcf735d2fc7a7d122a66eea29 (diff)
clean
-rw-r--r--scrape_ecourtindia_v6/.gitignore1
-rwxr-xr-xscrape_ecourtindia_v6/clean.sh2
-rw-r--r--scrape_ecourtindia_v6/requirements.txt3
-rw-r--r--scrape_ecourtindia_v6/run.sh3
-rw-r--r--scrape_ecourtindia_v6/scrape_orders.py4
-rw-r--r--scrape_ecourtindia_v6/templates/index.html40
-rw-r--r--scrape_ecourtindia_v6/web.py82
7 files changed, 71 insertions, 64 deletions
diff --git a/scrape_ecourtindia_v6/.gitignore b/scrape_ecourtindia_v6/.gitignore
index 62236f3..f390c7e 100644
--- a/scrape_ecourtindia_v6/.gitignore
+++ b/scrape_ecourtindia_v6/.gitignore
@@ -1,2 +1,3 @@
courts.csv
csv/*
+orders.json
diff --git a/scrape_ecourtindia_v6/clean.sh b/scrape_ecourtindia_v6/clean.sh
index 8c8a0ab..a38f202 100755
--- a/scrape_ecourtindia_v6/clean.sh
+++ b/scrape_ecourtindia_v6/clean.sh
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
-rm -r html/* pdf/* db.json
+rm -rf html/* pdf/* *.json
mkdir -p html pdf
diff --git a/scrape_ecourtindia_v6/requirements.txt b/scrape_ecourtindia_v6/requirements.txt
deleted file mode 100644
index 78bea83..0000000
--- a/scrape_ecourtindia_v6/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-selenium
-opencv-python
-pytesseract
diff --git a/scrape_ecourtindia_v6/run.sh b/scrape_ecourtindia_v6/run.sh
deleted file mode 100644
index de47eaf..0000000
--- a/scrape_ecourtindia_v6/run.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-
-uvicorn web:app --reload
diff --git a/scrape_ecourtindia_v6/scrape_orders.py b/scrape_ecourtindia_v6/scrape_orders.py
index b0dc311..146119e 100644
--- a/scrape_ecourtindia_v6/scrape_orders.py
+++ b/scrape_ecourtindia_v6/scrape_orders.py
@@ -43,8 +43,9 @@ def scrape_single_court(row):
scraper.select('court_complex_code', row[2])
sleep(1)
scraper.goto_courtnumber()
- sleep(0.6)
+ sleep(1)
scraper.select('nnjudgecode1', row[3])
+ sleep(1)
scraper.driver.find_element(By.ID, 'radBoth2').click()
@@ -60,7 +61,6 @@ def scrape_single_court(row):
def scrape_orders(courts_csv):
with open(courts_csv, newline='') as csvfile:
reader = csv.reader(csvfile)
- next(reader, None)
courts = list(reader)
with ThreadPoolExecutor(max_workers=5) as executor:
diff --git a/scrape_ecourtindia_v6/templates/index.html b/scrape_ecourtindia_v6/templates/index.html
deleted file mode 100644
index 0b01b77..0000000
--- a/scrape_ecourtindia_v6/templates/index.html
+++ /dev/null
@@ -1,40 +0,0 @@
-<html>
-<head>
- <title>Index</title>
- <link
- rel="stylesheet"
- href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css"
- >
- <meta charset="utf-8">
- <meta name="viewport" content="width=device-width, initial-scale=1">
- <meta name="color-scheme" content="light dark">
-</head>
-<body>
- <main class="container">
- <table>
- <thead>
- <tr>
- <th scope="col">Case Info</th>
- <th scope="col">Petitioner/Respondent</th>
- <th scope="col">HTML File</th>
- <th scope="col">Orders</th>
- </tr>
- </thead>
- <tbody>
- {% for view in views %}
- <tr>
- <th scope="row">{{ view.case_info }}</th>
- <td>{{ view.petitioner_respondent }}</td>
- <td><a href='{{ view.htmlfile }}'>Open</a></td>
- <td>
- {% for pdf in view.pdfs %}
- <a href='{{ pdf }}'>Open</a>
- {% endfor %}
- </td>
- </tr>
- {% endfor %}
- </tbody>
- </table>
- </main>
-</body>
-</html>
diff --git a/scrape_ecourtindia_v6/web.py b/scrape_ecourtindia_v6/web.py
index 195b81f..a0bf0b0 100644
--- a/scrape_ecourtindia_v6/web.py
+++ b/scrape_ecourtindia_v6/web.py
@@ -1,20 +1,72 @@
-from tinydb import TinyDB
+import os
+from flask import Flask, send_from_directory, abort
-from fastapi import FastAPI, Request
-from fastapi.responses import HTMLResponse
-from fastapi.staticfiles import StaticFiles
-from fastapi.templating import Jinja2Templates
+app = Flask(__name__)
-db = TinyDB('db.json')
-app = FastAPI()
+# Directory where PDFs are stored
+PDF_DIRECTORY = './pdf'
-app.mount("/html", StaticFiles(directory="html"), name="html")
-app.mount("/pdf", StaticFiles(directory="pdf"), name="pdf")
+@app.route('/pdf/<filename>')
+def view_pdf(filename):
+ """
+ Route to view a PDF file from the specified directory.
+
+ Args:
+ filename (str): Name of the PDF file to display
+
+ Returns:
+ PDF file or 404 error if file doesn't exist
+ """
+ try:
+ # Ensure the filename is safe and exists
+ if not filename.endswith('.pdf'):
+ abort(400, description="Invalid file type. Only PDF files are allowed.")
+
+ # Check if the file exists in the PDF directory
+ filepath = os.path.join(PDF_DIRECTORY, filename)
+ if not os.path.exists(filepath):
+ abort(404, description="PDF file not found")
+
+ # Send the PDF file
+ return send_from_directory(PDF_DIRECTORY, filename, as_attachment=False)
+
+ except Exception as e:
+ abort(500, description=f"Internal server error: {str(e)}")
-templates = Jinja2Templates(directory="templates")
+@app.route('/pdf')
+def list_pdfs():
+ """
+ Route to list all available PDF files in the directory.
+
+ Returns:
+ HTML page with list of PDFs or error message
+ """
+ try:
+ # Get list of PDF files in the directory
+ pdf_files = [f for f in os.listdir(PDF_DIRECTORY) if f.endswith('.pdf')]
+
+ # Create a simple HTML response with links to PDFs
+ pdf_links = "\n".join([
+ f'<li><a href="/pdf/{file}">{file}</a></li>'
+ for file in pdf_files
+ ])
+
+ return f"""
+ <html>
+ <head><title>PDF Viewer</title></head>
+ <body>
+ <h1>Available PDFs</h1>
+ <ul>{pdf_links}</ul>
+ </body>
+ </html>
+ """
+
+ except Exception as e:
+ abort(500, description=f"Error listing PDFs: {str(e)}")
-@app.get("/", response_class=HTMLResponse)
-async def index(request: Request):
- return templates.TemplateResponse(
- request=request, name="index.html", context={ 'views': db.all() }
- )
+if __name__ == '__main__':
+ # Ensure PDF directory exists
+ os.makedirs(PDF_DIRECTORY, exist_ok=True)
+
+ # Run the Flask app
+ app.run(host='0.0.0.0', port=8000, debug=True)