diff --git a/README.md b/README.md index b961e29..b580800 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # ResearchScope -**CS Research Intelligence Platform — 83,000+ papers, scored, ranked, and searchable.** +**CS Research Intelligence Platform — 100,000+ papers, scored, ranked, and searchable.** Stop skimming paper lists. ResearchScope scores papers by impact, surfaces research gaps, recommends venues, and tracks who's driving the frontier — updated daily. @@ -40,7 +40,7 @@ The frontend is a static site on GitHub Pages backed by a **FastAPI REST API** o |---|---| | **Jun 2026** | **OpenReview Acceptance Tiers** — oral/spotlight/poster signals captured for ICLR, NeurIPS, ICML & COLM; oral/spotlight boost paper scores and show as badges. Coverage extended through ICLR 2026, NeurIPS 2025, ICML 2025 | | **Jun 2026** | **Journal Recommender** — paste title + abstract to match against 20 Q1 journals (JMLR, TPAMI, Nature MI, CSUR…) with impact factor, review timeline, and open access info | -| **Jun 2026** | **FastAPI Backend on Railway** — full REST API with JWT auth, favourites, PostgreSQL full-text search (83K+ papers). User accounts synced across devices | +| **Jun 2026** | **FastAPI Backend on Railway** — full REST API with JWT auth, favourites, PostgreSQL full-text search (100K+ papers). User accounts synced across devices | | **Jun 2026** | **OpenAlex Integration** — 250M+ work catalogue added as a data source, covering ML/NLP/CV/IR concept groups | | **Jun 2026** | **HuggingFace Training Dataset** — `kishormorol/researchscope-papers` auto-pushed after every pipeline run: raw metadata JSONL + instruction-tuning pairs | | **Jun 2026** | **20 Q1 Journals** — JMLR, TMLR, TACL, TPAMI, IJCV, AIJ, TNNLS, Nature MI, CSUR, TIP, MLJ, TKDE, DAMI, NN, PR, CL, IPM, JACM, NatComms, TOIS | @@ -54,11 +54,11 @@ The frontend is a static site on GitHub Pages backed by a **FastAPI REST API** o | Feature | Description | |---|---| -| 📄 **83K+ papers** | Scored by recency, venue rank, acceptance tier (oral/spotlight), novelty, author prestige, and citation quality | +| 📄 **100K+ papers** | Scored by recency, venue rank, acceptance tier (oral/spotlight), novelty, author prestige, and citation quality | | 🎓 **A* Conference coverage** | NeurIPS, ICML, ICLR, CVPR, ACL, EMNLP, AAAI, IJCAI, CHI, SIGIR, WWW, KDD and more | | 📖 **20 Q1 Journals** | JMLR, TMLR, TACL, TPAMI, Nature MI, and 15 more — with IF, review time, OA status | | 🎯 **Venue Recommenders** | Conference + Journal recommenders: paste abstract → ranked matches with expectations | -| 🔍 **Full-text search** | PostgreSQL `tsvector` search across 83K papers via Railway API | +| 🔍 **Full-text search** | PostgreSQL `tsvector` search across 100K+ papers via Railway API | | 👤 **User accounts** | JWT auth, favourites synced across devices via Railway backend | | 🕳 **Research gaps** | 3-layer extraction: explicit, pattern-detected, and starter ideas | | 👩🔬 **Author intelligence** | 5,000+ researchers ranked by momentum score | @@ -134,7 +134,7 @@ The paper dataset is published on HuggingFace and auto-updated after every pipel ```python from datasets import load_dataset -# 83K+ raw paper records (pretraining / RAG) +# 100K+ raw paper records (pretraining / RAG) papers = load_dataset("kishormorol/researchscope-papers", data_files="data/papers.jsonl", split="train") diff --git a/site/assets/js/app.js b/site/assets/js/app.js index 925c6d2..0380847 100644 --- a/site/assets/js/app.js +++ b/site/assets/js/app.js @@ -204,6 +204,12 @@ async function loadStats() { const el = document.getElementById(id); if (el) el.textContent = (val ?? 0).toLocaleString(); } + // Hero tagline count — rounded down to a clean "N,000+" so it never goes stale. + const heroEl = document.getElementById('hero-paper-count'); + if (heroEl && stats.total_papers) { + const rounded = Math.floor(stats.total_papers / 1000) * 1000; + heroEl.textContent = rounded.toLocaleString() + '+'; + } const genEl = document.getElementById('stat-generated'); if (genEl && stats.generated_at) { genEl.textContent = 'Updated ' + new Date(stats.generated_at).toLocaleDateString('en-US', { month:'short', day:'numeric', year:'numeric' }); diff --git a/site/assets/js/railway-api.js b/site/assets/js/railway-api.js index 9a70a68..7bb7a6e 100644 --- a/site/assets/js/railway-api.js +++ b/site/assets/js/railway-api.js @@ -11,6 +11,11 @@ const RS_API = 'https://researchscope-production.up.railway.app'; +// Public site shows at most this many papers per section (arXiv / conference / +// journal) — 3 000 total. The full corpus stays available via the API and the +// Hugging Face dataset; this just bounds what the browse pages paginate through. +const SECTION_CAP = 1000; + // ── Auth state ──────────────────────────────────────────────────────────────── const _auth = { @@ -67,25 +72,33 @@ async function _queryPapers({ else if (source === 'conference') params.set('source_type', 'conference'); else if (source === 'journal') params.set('source_type', 'journal'); - // 1. Try Railway + const start = (page - 1) * pageSize; + + // 1. Try Railway — clamp the reported count and trim rows past the cap so the + // browse page paginates through at most SECTION_CAP papers for this section. try { const json = await _apiFetch(`/papers?${params}`); - if (json && Array.isArray(json.results)) - return { data: json.results, count: json.total ?? 0, error: null }; + if (json && Array.isArray(json.results)) { + const count = Math.min(json.total ?? 0, SECTION_CAP); + let data = json.results; + if (start >= SECTION_CAP) data = []; + else if (start + data.length > SECTION_CAP) data = data.slice(0, SECTION_CAP - start); + return { data, count, error: null }; + } } catch (e) { console.warn('[railway] queryPapers failed, falling back to static JSON:', e.message); } - // 2. Last resort — static JSON + // 2. Last resort — static JSON (already capped at 1 000 by the generator) try { const res = await fetch('data/papers.json'); const all = await res.json(); - const start = (page - 1) * pageSize; const filtered = search ? all.filter(p => (p.title||'').toLowerCase().includes(search.toLowerCase()) || (p.abstract||'').toLowerCase().includes(search.toLowerCase())) : all; - return { data: filtered.slice(start, start + pageSize), count: filtered.length, error: null }; + const count = Math.min(filtered.length, SECTION_CAP); + return { data: filtered.slice(start, Math.min(start + pageSize, SECTION_CAP)), count, error: null }; } catch (e) { console.warn('[static] papers.json failed:', e.message); } diff --git a/site/index.html b/site/index.html index 94950cc..895d9f2 100644 --- a/site/index.html +++ b/site/index.html @@ -137,7 +137,7 @@
83K+ papers scored by impact, novelty, and venue rank. Filter by topic, year, difficulty, or source.
+Papers scored by impact, novelty, and venue rank. Filter by topic, year, difficulty, or source.