diff --git a/.gitignore b/.gitignore index 5aaa73e..c2487a0 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,6 @@ yarn-error.log* # typescript *.tsbuildinfo next-env.d.ts + +# sqlite +*.db diff --git a/README.md b/README.md index f781554..380ec86 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Adanomad Online Assessment +# PDF Highlighter ## Project Overview @@ -14,6 +14,7 @@ This project is a PDF viewer and keyword search application developed as part of - Text highlighting for search matches - Sidebar for search results and navigation - Responsive design for various screen sizes +- Persistent storage of highlights using SQLite or Supabase ## Technologies Used @@ -21,8 +22,9 @@ This project is a PDF viewer and keyword search application developed as part of - React - TypeScript - react-pdf library for PDF rendering -- Tailwind CSS for styling -- Custom highlight storage solution +- Tailwind CSS for stylinge +- SQLite for local highlight storage +- Supabase for cloud-based highlight storage (optional) ## Getting Started @@ -33,43 +35,11 @@ This project is a PDF viewer and keyword search application developed as part of ## Project Structure -``` -. -├── app -│ ├── components -│ │ ├── App.tsx -│ │ ├── Button.tsx -│ │ ├── HighlightPopup.tsx -│ │ ├── Input.tsx -│ │ ├── KeywordSearch.tsx -│ │ ├── PdfUploader.tsx -│ │ ├── PdfViewer.tsx -│ │ ├── Sidebar.tsx -│ │ └── Spinner.tsx -│ ├── favicon.ico -│ ├── globals.css -│ ├── layout.js -│ ├── page.js -│ ├── styles -│ │ ├── output.css -│ │ └── styles.css -│ └── utils -│ ├── highlightStorage.ts -│ └── pdfUtils.ts -├── public -│ ├── sample.pdf -│ └── ... -├── scripts -│ └── comment-file-path.sh -├── README.md -├── screenshot.png -└── ... -``` - - `app/page.js`: Main entry point of the application - `app/components/`: React components for various parts of the application - `app/utils/`: Utility functions for PDF processing and highlight storage - `app/styles/`: CSS files for styling +- `app/api/`: API routes for handling highlight operations ## Key Components @@ -78,24 +48,26 @@ This project is a PDF viewer and keyword search application developed as part of - `KeywordSearch.tsx`: Manages keyword search functionality - `HighlightPopup.tsx`: Displays information about highlighted text - `Sidebar.tsx`: Shows search results and navigation options +- `highlightStorage.ts`: Manages highlight storage operations +- `sqliteUtils.ts`: Handles SQLite database operations -## Screenshot +## Features -![Application Screenshot](./screenshot.png) +- Has a highlight storage system supporting both SQLite and Supabase +- API routes for creating, retrieving, updating, and deleting highlights +- User authentication and document permissions (currently disabled) +- Export/import as JSON functionality for highlights +- Scroll the sidebar highlighted area into view across different PDFs. -*The screenshot above shows the main interface of the PDF viewer application, including the document display, search functionality, and sidebar.* ## Future Improvements - Implement annotation tools (e.g., freehand drawing, text notes) -- Add support for multiple document comparison +- Add support for multiple document search +- Pre-process batch PDFs for quicker highlights - Enhance mobile responsiveness for better small-screen experience -- Implement user authentication and document permissions - Optimize performance for large PDF files - -## Contributing - -Contributions, issues, and feature requests are welcome. Feel free to check [issues page](https://github.com/yourusername/your-repo-name/issues) if you want to contribute. +- Upload the PDF into the database. ## License @@ -103,6 +75,8 @@ Contributions, issues, and feature requests are welcome. Feel free to check [iss ## Acknowledgements +- [Next.js](https://nextjs.org/) for the React framework +- [SQLite](https://www.sqlite.org/) for local database storage +- [Supabase](https://supabase.io/) for cloud database capabilities - [react-pdf](https://github.com/wojtekmaj/react-pdf) for PDF rendering capabilities - [Tailwind CSS](https://tailwindcss.com/) for utility-first CSS framework -- [Next.js](https://nextjs.org/) for the React framework diff --git a/app/api/auth/[...nextauth]/route.ts b/app/api/auth/[...nextauth]/route.ts new file mode 100644 index 0000000..ab35def --- /dev/null +++ b/app/api/auth/[...nextauth]/route.ts @@ -0,0 +1,2 @@ +// app/api/auth/[...nextauth]/route.ts +export { GET, POST } from "../../../utils/auth"; diff --git a/app/api/highlight/get/route.ts b/app/api/highlight/get/route.ts new file mode 100644 index 0000000..9f77652 --- /dev/null +++ b/app/api/highlight/get/route.ts @@ -0,0 +1,49 @@ +// app/api/highlight/get/route.ts +import HighlightStorage from "../../../utils/highlightStorage"; +import { storageMethod } from "../../../utils/env"; +import { StorageMethod } from "../../../utils/types"; +import { getHighlightsForPdf as supabaseGetHighlightsForPdf } from "../../../utils/supabase"; + +async function handleRequest(req: Request): Promise { + let db: HighlightStorage | undefined; + try { + const body = await req.json(); + let highlights; + + if (storageMethod === StorageMethod.sqlite) { + db = new HighlightStorage(); + highlights = await db.getHighlightsForPdf(body.pdfId); + } else { + highlights = await supabaseGetHighlightsForPdf(body.pdfId); + } + + return new Response(JSON.stringify(highlights), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + } catch (error) { + console.error("Error in handleRequest:", error); + return new Response( + JSON.stringify({ + error: "Internal Server Error", + details: error.message, + }), + { + status: 500, + headers: { "Content-Type": "application/json" }, + } + ); + } finally { + if (db) { + try { + await db.close(); + } catch (closeError) { + console.error("Error closing database:", closeError); + } + } + } +} + +export async function POST(req: Request): Promise { + return handleRequest(req); +} diff --git a/app/api/highlight/update/route.ts b/app/api/highlight/update/route.ts new file mode 100644 index 0000000..c2e9bc6 --- /dev/null +++ b/app/api/highlight/update/route.ts @@ -0,0 +1,78 @@ +// app/api/highlight/update/route.ts + +import HighlightStorage from "../../../utils/highlightStorage"; +import { storageMethod } from "../../../utils/env"; +import { + deleteHighlight as supabaseDeleteHighlight, + saveBulkHighlights as supabaseSaveBulkHighlights, + saveHighlight as supabaseSaveHighlight, +} from "../../../utils/supabase"; +import { StorageMethod, StoredHighlight } from "../../../utils/types"; + +async function handleRequest( + req: Request, + action: (body: any, db?: HighlightStorage) => Promise +): Promise { + let db: HighlightStorage | undefined; + try { + const body = await req.json(); + if (storageMethod === StorageMethod.sqlite) { + db = new HighlightStorage(); + } + await action(body, db); + return new Response(null, { status: 200 }); + } catch (error) { + console.error(error); + return new Response(null, { status: 500 }); + } finally { + if (db) { + await db.close(); + } + } +} + +async function saveHighlights(body: any, db?: HighlightStorage): Promise { + if (db) { + if (Array.isArray(body.highlights)) { + await db.saveBulkHighlights(ensureKeywords(body.highlights)); + } else { + await db.saveHighlight(ensureKeyword(body.highlights)); + } + } else { + if (Array.isArray(body)) { + await supabaseSaveBulkHighlights(ensureKeywords(body)); + } else { + await supabaseSaveHighlight(ensureKeyword(body)); + } + } +} + +async function removeHighlight( + body: any, + db?: HighlightStorage +): Promise { + if (db) { + await db.deleteHighlight(body.pdfId, body.id); + } else { + await supabaseDeleteHighlight(body); + } +} + +function ensureKeyword(highlight: StoredHighlight): StoredHighlight { + return { + ...highlight, + keyword: highlight.keyword || "", + }; +} + +function ensureKeywords(highlights: StoredHighlight[]): StoredHighlight[] { + return highlights.map(ensureKeyword); +} + +export async function POST(req: Request): Promise { + return handleRequest(req, saveHighlights); +} + +export async function DELETE(req: Request): Promise { + return handleRequest(req, removeHighlight); +} diff --git a/app/api/index/route.ts b/app/api/index/route.ts new file mode 100644 index 0000000..de154fd --- /dev/null +++ b/app/api/index/route.ts @@ -0,0 +1,27 @@ +// app/api/index/route.ts +import { storageMethod } from "../../utils/env"; +import HighlightStorage from "../../utils/highlightStorage"; +import { StorageMethod } from "../../utils/types"; + +export async function POST(req: Request) { + let response; + let db; + try { + const body = await req.json(); + if (storageMethod === StorageMethod.sqlite) { + db = new HighlightStorage(body.pdfId); + await db.indexWords(body.pdfId, body.words); + } else { + throw new Error("Index via supabase has not been implemented"); + } + response = new Response(null, { status: 200 }); + } catch (error) { + console.log(error); + response = new Response(null, { status: 500 }); + } finally { + if (db) { + await db.close(); + } + return response; + } +} diff --git a/app/components/App.tsx b/app/components/App.tsx index 464ba72..730f94b 100644 --- a/app/components/App.tsx +++ b/app/components/App.tsx @@ -1,30 +1,155 @@ // app/components/App.tsx "use client"; -import React, { useState, useEffect, useRef } from "react"; +import React, { useCallback, useState, useEffect, useRef } from "react"; import PdfUploader from "./PdfUploader"; import KeywordSearch from "./KeywordSearch"; import PdfViewer from "./PdfViewer"; -import { searchPdf } from "../utils/pdfUtils"; +import { Header } from "./Header"; +import Spinner from "./Spinner"; +import { convertPdfToImages, searchPdf } from "../utils/pdfUtils"; import type { IHighlight } from "react-pdf-highlighter"; +import HighlightUploader from "./HighlightUploader"; +import { StoredHighlight, StorageMethod } from "../utils/types"; +import { + IHighlightToStoredHighlight, + StoredHighlightToIHighlight, +} from "../utils/utils"; +import { createWorker } from "tesseract.js"; +// import { useSession } from "next-auth/react"; +import { getPdfId } from "../utils/pdfUtils"; +import { storageMethod } from "../utils/env"; export default function App() { const [pdfUploaded, setPdfUploaded] = useState(false); const [searchTerm, setSearchTerm] = useState(""); const [pdfUrl, setPdfUrl] = useState(null); + const [pdfOcrUrl, setPdfOcrUrl] = useState(null); + const [pdfName, setPdfName] = useState(null); + const [pdfId, setPdfId] = useState(null); + const [highlightUrl, setHighlightUrl] = useState(null); const [highlights, setHighlights] = useState>([]); const [highlightsKey, setHighlightsKey] = useState(0); + const [loading, setLoading] = useState(false); const pdfViewerRef = useRef(null); + // const session = useSession(); useEffect(() => { setHighlightsKey((prev) => prev + 1); }, [highlights]); - const handleFileUpload = (file: File) => { - const fileUrl = URL.createObjectURL(file); + const handleFileUpload = async (file: File) => { + setLoading(true); + let fileUrl = URL.createObjectURL(file); + const pdfId = getPdfId( + file.name, + /* session.data?.user?.email ?? */ undefined + ); + // Creating a searchable PDF: + // Convert uploaded PDF file to b64 image, + // perform OCR, + // convert output back to PDF + // update file url with new PDF url + const i = await convertPdfToImages(file); + const worker = await createWorker("eng"); + const res = await worker.recognize( + i[0], + { pdfTitle: "ocr-out" }, + { pdf: true } + ); + const pdf = res.data.pdf; + if (pdf) { + // Update file url if OCR success + const blob = new Blob([new Uint8Array(pdf)], { type: "application/pdf" }); + const fileOcrUrl = URL.createObjectURL(blob); + setPdfOcrUrl(fileOcrUrl); + + // Index words + // const data = res.data.words; + // const words = data.map(({ text, bbox: { x0, y0, x1, y1 } }) => { + // return { + // keyword: text, + // x1: x0, + // y1: y0, + // x2: x1, + // y2: y1, + // }; + // }); + // await fetch("/api/index", { + // method: "POST", + // headers: { "Content-Type": "application/json" }, + // body: JSON.stringify({ + // pdfId, + // words, + // }), + // }); + } setPdfUrl(fileUrl); setPdfUploaded(true); + setPdfName(file.name); + setPdfId(pdfId); + setLoading(false); }; + useEffect(() => { + const getHighlights = async () => { + if (!pdfName) { + return; + } + const res = await fetch("/api/highlight/get", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(pdfId), + }); + if (res.ok) { + const resHighlights = await res.json(); + console.log("getHighlights", pdfId, resHighlights); + if (resHighlights) { + const highlights = resHighlights.map( + (storedHighlight: StoredHighlight) => { + return StoredHighlightToIHighlight(storedHighlight); + } + ); + setHighlights(highlights); + } + } + }; + getHighlights(); + }, [pdfName, pdfId]); + + const handleHighlightUpload = (file: File) => { + const fileUrl = URL.createObjectURL(file); + setHighlightUrl(fileUrl); + }; + + useEffect(() => { + const setHighlightsFromFile = async () => { + if (!highlightUrl || !pdfUploaded) { + return; + } + const res = await fetch(highlightUrl); + if (res.ok) { + const data = await res.json(); + const highlights = data.map((highlight: StoredHighlight) => + StoredHighlightToIHighlight(highlight) + ); + setHighlights(highlights); + const body = + storageMethod === StorageMethod.sqlite + ? { + pdfId, + highlights: data, + } + : data; + await fetch("/api/highlight/update", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + } + }; + setHighlightsFromFile(); + }, [highlightUrl, pdfUploaded, pdfId]); + const resetHighlights = () => { setHighlights([]); }; @@ -32,7 +157,7 @@ export default function App() { const handleSearch = async () => { if (pdfUrl && searchTerm) { const keywords = searchTerm.split("|"); - let currentZoom = 1.45; + let currentZoom = 1; if (pdfViewerRef.current) { if ("scale" in pdfViewerRef.current) { @@ -51,35 +176,123 @@ export default function App() { console.log("Current zoom level:", currentZoom); - const newHighlights = await searchPdf(keywords, pdfUrl, currentZoom); + let newHighlights = await searchPdf(keywords, pdfUrl, currentZoom); + if (newHighlights.length === 0 && pdfOcrUrl) { + // Try searching the OCR pdf + // This step is sometimes required due to the OCR process + // possibly being lossy (pdf -> png -> pdf) + // which means some words are missing/malformed + newHighlights = await searchPdf(keywords, pdfOcrUrl, currentZoom); + } + console.log("newHighlights:", JSON.stringify(newHighlights, null, 2)); const updatedHighlights = [...highlights, ...newHighlights]; + + if (pdfName && pdfId) { + const storedHighlights = updatedHighlights.map((highlight) => + IHighlightToStoredHighlight(highlight, pdfId) + ); + const body = + storageMethod === StorageMethod.sqlite + ? { + pdfId, + highlights: storedHighlights, + } + : storedHighlights; + await fetch("/api/highlight/update", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + } + setHighlights(updatedHighlights); } }; + const parseIdFromHash = () => { + return document.location.hash.slice("#highlight-".length); + }; + + const resetHash = () => { + document.location.hash = ""; + }; + + const scrollViewerTo = useRef((highlight: IHighlight) => { + if (pdfViewerRef.current && highlight) { + pdfViewerRef.current.scrollTo(highlight); + } + }); + + const scrollToHighlightFromHash = useCallback(() => { + const highlightId = parseIdFromHash(); + const highlight = highlights.find((h) => h.id === highlightId); + if (highlight) { + scrollViewerTo.current(highlight); + } + }, [highlights]); + + useEffect(() => { + window.addEventListener("hashchange", scrollToHighlightFromHash, false); + return () => { + window.removeEventListener( + "hashchange", + scrollToHighlightFromHash, + false + ); + }; + }, [scrollToHighlightFromHash]); + return ( -
-
-
- - - +
+
+
+
+
+ +
+
+ + { + /* session.status === "authenticated" && */ pdfId && ( + + ) + } + {pdfUrl && ( + + )} +
+ {loading ? ( +
+ +
+ ) : ( + + )}
diff --git a/app/components/Button.tsx b/app/components/Button.tsx index e647286..0eb8777 100644 --- a/app/components/Button.tsx +++ b/app/components/Button.tsx @@ -15,11 +15,11 @@ const Button: React.FC = ({ ...props }) => { const baseStyles = - "inline-flex items-center justify-center rounded-md focus:outline-none focus:ring-2 focus:ring-offset-2"; + "inline-flex items-center justify-center rounded-md focus:outline-none focus:ring-2 focus:ring-offset-2 cursor-pointer disabled:cursor-not-allowed"; const variantStyles = { - default: "bg-blue-500 text-white hover:bg-blue-600", - outline: "border border-gray-300 text-gray-700 hover:bg-gray-100", - ghost: "bg-transparent text-gray-700 hover:bg-gray-100", + default: "bg-blue-500 text-white enabled:hover:bg-blue-600", + outline: "border border-gray-300 text-gray-700 enabled:hover:bg-gray-100", + ghost: "bg-transparent text-gray-700 enabled:hover:bg-gray-100", }; const sizeStyles = { default: "px-4 py-2 text-sm", diff --git a/app/components/Header.tsx b/app/components/Header.tsx new file mode 100644 index 0000000..b413127 --- /dev/null +++ b/app/components/Header.tsx @@ -0,0 +1,29 @@ +// app/components/Header.tsx +import Link from "next/link"; +import { useSession } from "next-auth/react"; + +interface HeaderProps {} + +const Header: React.FC = ({}: HeaderProps) => { + // const session = useSession(); + return ( +
+
+

+ Adanomad Challenge +

+
+ {/*
+
+ + {`${session.status === "authenticated" ? "Sign out" : "Sign in with Google"}`} + +
+
*/} +
+ ); +}; + +export { Header }; diff --git a/app/components/HighlightUploader.tsx b/app/components/HighlightUploader.tsx new file mode 100644 index 0000000..eb942f1 --- /dev/null +++ b/app/components/HighlightUploader.tsx @@ -0,0 +1,68 @@ +// app/components/HighlightUploader.tsx +import React from "react"; +import { Button } from "./Button"; +import { Input } from "./Input"; +import { Download, Upload } from "lucide-react"; +import { IHighlight } from "react-pdf-highlighter"; +import { IHighlightToStoredHighlight } from "../utils/utils"; + +interface HighlightUploader { + onFileUpload: (file: File) => void; + highlights: IHighlight[]; + pdfId: string; +} + +const HighlightUploader: React.FC = ({ + onFileUpload, + highlights, + pdfId, +}) => { + const handleFileUpload = (event: React.ChangeEvent) => { + if (event.target.files && event.target.files[0]) { + onFileUpload(event.target.files[0]); + } + }; + + return ( + + ); +}; + +export default HighlightUploader; diff --git a/app/components/PdfUploader.tsx b/app/components/PdfUploader.tsx index 058f618..9837fb6 100644 --- a/app/components/PdfUploader.tsx +++ b/app/components/PdfUploader.tsx @@ -1,8 +1,8 @@ -// app/components/PdfUploader.tsx import React from "react"; import { Button } from "./Button"; import { Input } from "./Input"; import { Upload } from "lucide-react"; +import { supabase } from '../utils/supabase'; //Import Supabase client interface PdfUploaderProps { onFileUpload: (file: File) => void; @@ -13,9 +13,28 @@ const PdfUploader: React.FC = ({ onFileUpload, pdfUploaded, }) => { - const handleFileUpload = (event: React.ChangeEvent) => { + const handleFileUpload = async (event: React.ChangeEvent) => { if (event.target.files && event.target.files[0]) { - onFileUpload(event.target.files[0]); + const file = event.target.files[0]; + console.log('Selected file:', file); //Log selected file for confirmation + + try { + //Upload the PDF to Supabase storage + const { data, error } = await supabase.storage + .from('pdf-bucket') + .upload(`documents/${file.name}`, file); + + if (error) throw new Error(`Failed to upload PDF: ${error.message}`); + + console.log('PDF uploaded successfully:', data); + onFileUpload(file); + } catch (err: unknown) { + if (err instanceof Error) { + console.error('Error uploading the PDF:', err.message); + } else { + console.error('Unknown error occurred during PDF upload'); + } + } } }; @@ -23,9 +42,9 @@ const PdfUploader: React.FC = ({