Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kiwix search implementation #676

Merged
merged 3 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions backend/seeder/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func seedTestData(db *gorm.DB) {
}
}
kiwix := models.OpenContentProvider{
Url: "https://library.kiwix.org",
Url: "https://kiwix.staging.unlockedlabs.xyz",
Title: models.Kiwix,
ThumbnailUrl: "https://images.fineartamerica.com/images/artworkimages/mediumlarge/3/llamas-wearing-party-hats-in-a-circle-looking-down-john-daniels.jpg",
CurrentlyEnabled: true,
Expand All @@ -109,17 +109,17 @@ func seedTestData(db *gorm.DB) {
Language: models.StringPtr("eng,spa,ara"),
Description: models.StringPtr("A collection of TED videos about ted connects"),
Url: "/content/ted_mul_ted-connects_2024-08",
ThumbnailUrl: models.StringPtr("/catalog/v2/illustration/67440563-a62b-fabe-415c-4c3ee4546f78/?size=48"),
ThumbnailUrl: models.StringPtr("/kiwix.jpg"),
VisibilityStatus: true,
},
{
OpenContentProviderID: kiwix.ID,
ExternalID: models.StringPtr("urn:uuid:84812c13-fa65-feb7-c206-4f22cc2e0f9a"),
ExternalID: models.StringPtr("urn:uuid:93321718-5228-676d-7e95-14bbe88fa38c"),
Title: "Python Documentation",
Language: models.StringPtr("eng"),
Description: models.StringPtr("All documentation for Python"),
Url: "/content/docs.python.org_en_2024-09",
ThumbnailUrl: models.StringPtr("/catalog/v2/illustration/84812c13-fa65-feb7-c206-4f22cc2e0f9a/?size=48"),
Url: "/content/docs.python.org_en_2025-01",
ThumbnailUrl: models.StringPtr("/kiwix.jpg"),
VisibilityStatus: true,
},
{
Expand All @@ -129,7 +129,7 @@ func seedTestData(db *gorm.DB) {
Language: models.StringPtr("eng"),
Description: models.StringPtr("The Canadian financial wiki"),
Url: "/content/finiki_en_all_maxi_2024-06",
ThumbnailUrl: models.StringPtr("/catalog/v2/illustration/19e6fe12-09a9-0a38-5be4-71c0eba0a72d/?size=48"),
ThumbnailUrl: models.StringPtr("/kiwix.jpg"),
VisibilityStatus: true,
}}
for idx := range kiwixLibraries {
Expand Down
31 changes: 27 additions & 4 deletions backend/src/database/libraries.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,16 @@ type LibraryResponse struct {
IsFavorited bool `json:"is_favorited"`
}

func (db *DB) GetAllLibraries(page, perPage, days int, userId, facilityId uint, visibility, orderBy, search string, isAdmin bool) (int64, []LibraryResponse, error) {
// Retrieves either a paginated list of libraries or all libraries based upon the given parameters.
// page - the page number for pagination
// perPage - the number of libraries to display on page
// userId - the userId for which libraries to display for
// facilityId - the facility id of where the libraries were favorited
// visibility - can either be featured, visible, hidden, or all
// orderBy - the order in which the results are returned
// isAdmin - true or false on whether the user is an administrator used to determine how to retrieve featured libraries
// all - true or false on whether or not to return all libraries without pagination
func (db *DB) GetAllLibraries(page, perPage, days int, userId, facilityId uint, visibility, orderBy, search string, isAdmin, all bool) (int64, []LibraryResponse, error) {
var (
total int64
criteria string
Expand Down Expand Up @@ -81,14 +90,15 @@ func (db *DB) GetAllLibraries(page, perPage, days int, userId, facilityId uint,
AND f.open_content_provider_id = libraries.open_content_provider_id`)
}
tx = tx.Group("libraries.id").Order("favorite_count DESC")

default:
tx = tx.Order(orderBy)
}
if err := tx.Limit(perPage).Offset(calcOffset(page, perPage)).Find(&libraries).Error; err != nil {
if !all {
tx = tx.Limit(perPage).Offset(calcOffset(page, perPage))
}
if err := tx.Find(&libraries).Error; err != nil {
return 0, nil, newGetRecordsDBError(err, "libraries")
}

return total, libraries, nil
}

Expand All @@ -101,6 +111,19 @@ func (db *DB) GetLibraryByID(id int) (*models.Library, error) {
return &library, nil
}

func (db *DB) GetLibrariesByIDs(ids []int) ([]models.Library, error) {
var libraries []models.Library
tx := db.Preload("OpenContentProvider").Where("id in ?", ids)
if len(ids) > 1 {
tx.Where("language = 'eng'")
}
if err := tx.Find(&libraries).Error; err != nil {
log.Errorln("unable to find libraries with these IDs")
return nil, newNotFoundDBError(err, "libraries")
}
return libraries, nil
}

func (db *DB) ToggleVisibilityAndRetrieveLibrary(id int) (*models.Library, error) {
var library models.Library
if err := db.Preload("OpenContentProvider").Find(&library, "id = ?", id).Error; err != nil {
Expand Down
80 changes: 79 additions & 1 deletion backend/src/handlers/libraries_handler.go
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question to consider- right now, we are only able to search through the libraries that are visible due to incorporating with this api. this should be the case for students, but should admin be able to search through all libraries, even those that are hidden? I would think yes (it might help them be able to search to find the library they want to show)
Screenshot 2025-01-27 at 4 11 43 PM
Screenshot 2025-01-27 at 4 11 58 PM

Searching inside of ubuntu, i can do this if i click directly on the library or on the search on that library, but it does not appear in the dropdown.
Screenshot 2025-01-27 at 4 12 22 PM

if you pass in all, you'll have to bypass showHidden also (might happen in database file, not handler file)

Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,41 @@ package handlers
import (
"UnlockEdv2/src/models"
"encoding/json"
"encoding/xml"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"path"
"strconv"
"strings"
)

func (srv *Server) registerLibraryRoutes() []routeDef {
axx := models.Feature(models.OpenContentAccess)
return []routeDef{
{"GET /api/libraries", srv.handleIndexLibraries, false, axx},
{"GET /api/libraries/search", srv.handleSearchLibraries, false, axx},
{"GET /api/libraries/{id}", srv.handleGetLibrary, false, axx},
{"PUT /api/libraries/{id}/toggle", srv.handleToggleLibraryVisibility, true, axx},
{"PUT /api/libraries/{id}/favorite", srv.handleToggleFavoriteLibrary, false, axx},
}
}

// Retrieves either a paginated list of libraries or all libraries based upon the HTTP request parameters.
// Query Parameters:
// page - the page number for pagination
// perPage - the number of libraries to display on page
// search - the title or patial title of the libraries to search for
// order_by - (title|created_at|most_popular) the order in which the results are returned
// visibility - can either be featured, visible, hidden, or all
// all - true or false on whether or not to return all libraries without pagination
func (srv *Server) handleIndexLibraries(w http.ResponseWriter, r *http.Request, log sLog) error {
page, perPage := srv.getPaginationInfo(r)
search := r.URL.Query().Get("search")
orderBy := r.URL.Query().Get("order_by")
all := r.URL.Query().Get("all") == "true"
days, err := strconv.Atoi(r.URL.Query().Get("days"))
if err != nil {
days = -1
Expand All @@ -34,7 +51,7 @@ func (srv *Server) handleIndexLibraries(w http.ResponseWriter, r *http.Request,
showHidden = r.URL.Query().Get("visibility")
}
claims := r.Context().Value(ClaimsKey).(*Claims)
total, libraries, err := srv.Db.GetAllLibraries(page, perPage, days, claims.UserID, claims.FacilityID, showHidden, orderBy, search, claims.isAdmin())
total, libraries, err := srv.Db.GetAllLibraries(page, perPage, days, claims.UserID, claims.FacilityID, showHidden, orderBy, search, claims.isAdmin(), all)
if err != nil {
return newDatabaseServiceError(err)
}
Expand All @@ -55,6 +72,67 @@ func (srv *Server) handleGetLibrary(w http.ResponseWriter, r *http.Request, log
return writeJsonResponse(w, http.StatusOK, library)
}

func (srv *Server) handleSearchLibraries(w http.ResponseWriter, r *http.Request, log sLog) error {
page, perPage := srv.getPaginationInfo(r)
search := r.URL.Query().Get("search")
ids := r.URL.Query()["library_id"]
libraryIDs := make([]int, 0, len(ids))
for _, id := range ids {
if libID, err := strconv.Atoi(id); err == nil {
libraryIDs = append(libraryIDs, libID)
}
}
libraries, err := srv.Db.GetLibrariesByIDs(libraryIDs)
if err != nil {
log.add("library_ids", libraryIDs)
return newDatabaseServiceError(err)
}
nextPage := (page-1)*perPage + 1
queryParams := url.Values{}
for _, library := range libraries {
queryParams.Add("books.name", path.Base(library.Url))
}
queryParams.Add("format", "xml")
queryParams.Add("pattern", search)
kiwixSearchURL := fmt.Sprintf("%s/search?start=%d&pageLength=%d&%s", models.KiwixLibraryUrl, nextPage, perPage, queryParams.Encode())
request, err := http.NewRequest(http.MethodGet, kiwixSearchURL, nil)
log.add("kiwix_search_url", kiwixSearchURL)
if err != nil {
return newInternalServerServiceError(err, "unable to create new request to kiwix")
}
resp, err := srv.Client.Do(request)
if err != nil {
return newInternalServerServiceError(err, "error executing kiwix search request")
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.add("status_code", resp.StatusCode)
body, err := io.ReadAll(resp.Body)
if err != nil {
return newInternalServerServiceError(err, "executing request returned unexpected status, and failed to read error from its response")
}
log.add("kiwix_error", string(body))
return newBadRequestServiceError(errors.New("api call to kiwix failed"), "response contained unexpected status code from kiwix")
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return newInternalServerServiceError(err, "error reading body of response")
}
var rss models.RSS
err = xml.Unmarshal(body, &rss)
if err != nil {
return newInternalServerServiceError(err, "error parsing response body into XML")
}
total, err := strconv.ParseInt(strings.ReplaceAll(rss.Channel.TotalResults, ",", ""), 10, 64)
if err != nil {
return newInternalServerServiceError(err, "error parsing the total results value into an int64")
}
paginationData := models.NewPaginationInfo(page, perPage, int64(total))
channels := make([]*models.KiwixChannel, 0, 1) //only ever going to be one KiwixChannel
channels = append(channels, rss.IntoKiwixChannel(libraries))
return writePaginatedResponse(w, http.StatusOK, channels, paginationData)
}

func (srv *Server) handleToggleLibraryVisibility(w http.ResponseWriter, r *http.Request, log sLog) error {
id, err := strconv.Atoi(r.PathValue("id"))
if err != nil {
Expand Down
125 changes: 125 additions & 0 deletions backend/src/models/library.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
package models

import (
"encoding/xml"
"fmt"
"strings"
)

type Library struct {
DatabaseFields
OpenContentProviderID uint `gorm:"not null" json:"open_content_provider_id"`
Expand Down Expand Up @@ -35,3 +41,122 @@ type LibraryProxyPO struct {
BaseUrl string
VisibilityStatus bool
}

// Kiwix XML START here...
type RSS struct {
XMLName xml.Name `xml:"rss"`
Version string `xml:"version,attr"`
Channel Channel `xml:"channel"`
}

type Channel struct {
Title string `xml:"title"`
Description string `xml:"description"`
TotalResults string `xml:"http://a9.com/-/spec/opensearch/1.1/ totalResults"`
StartIndex string `xml:"http://a9.com/-/spec/opensearch/1.1/ startIndex"`
ItemsPerPage string `xml:"http://a9.com/-/spec/opensearch/1.1/ itemsPerPage"`
Items []Item `xml:"item"`
}

type Item struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description Description `xml:"description"`
Book Book `xml:"book"`
WordCount string `xml:"wordCount"`
}

type Description struct {
RawText string `xml:"-"`
}

type Book struct {
Title string `xml:"title"`
}

type KiwixChannel struct {
Title string `json:"title"`
Link string `json:"link"`
Description string `json:"description"`
TotalResults string `json:"total_results"`
StartIndex string `json:"start_index"`
ItemsPerPage string `json:"items_per_page"`
Items []KiwixItem `json:"items"`
}

type KiwixItem struct {
Library
PageTitle string `json:"page_title"`
}

func (rss *RSS) IntoKiwixChannel(libraries []Library) *KiwixChannel {
channel := &KiwixChannel{
Title: rss.Channel.Title,
Description: rss.Channel.Description,
TotalResults: rss.Channel.TotalResults,
StartIndex: rss.Channel.StartIndex,
ItemsPerPage: rss.Channel.ItemsPerPage,
Items: []KiwixItem{},
}
for _, item := range rss.Channel.Items {
library := getLibrary(libraries, item.Link)
kiwixItem := KiwixItem{
Library: Library{
DatabaseFields: DatabaseFields{
ID: library.ID,
},
Url: fmt.Sprintf("/api/proxy/libraries/%d%s", library.ID, item.Link),
ThumbnailUrl: library.ThumbnailUrl,
Description: &item.Description.RawText,
Title: item.Book.Title,
},
PageTitle: item.Title,
}
channel.Items = append(channel.Items, kiwixItem)
}
return channel
}

func getLibrary(libraries []Library, link string) *Library {
var foundLibrary *Library
for _, library := range libraries {
if strings.HasPrefix(link, library.Url) {
foundLibrary = &library
break
}
}
return foundLibrary
}

// isolates and keeps the bolded words in the description
func (d *Description) UnmarshalXML(dec *xml.Decoder, start xml.StartElement) error {
var rawContent strings.Builder

for { //just keep looping till return nil
tok, err := dec.Token()
if err != nil {
return err
}
switch ty := tok.(type) {
case xml.StartElement:
if ty.Name.Local == "b" {
var boldText string
err = dec.DecodeElement(&boldText, &ty)
if err != nil {
return err
}
rawContent.WriteString("<b>" + boldText + "</b>") //write it back into the description, need this
} else { //just in case another tag is found
rawContent.WriteString("<" + ty.Name.Local + ">")
}
case xml.EndElement:
if ty.Name.Local == start.Name.Local {
d.RawText = rawContent.String()
return nil
}
rawContent.WriteString("</" + ty.Name.Local + ">")
case xml.CharData:
rawContent.WriteString(string(ty))
}
}
}
4 changes: 3 additions & 1 deletion backend/src/models/open_content.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package models

import (
"fmt"
"os"
"strings"
"time"

Expand Down Expand Up @@ -69,7 +70,6 @@ const (
KolibriDescription string = "Kolibri provides an extensive library of educational content suitable for all learning levels."
KiwixThumbnailURL string = "/kiwix.jpg"
KiwixDescription string = "Kiwix is an offline reader that allows you to host a wide array of educational content."
KiwixLibraryUrl string = "https://library.kiwix.org"
YoutubeThumbnail string = "/youtube.png"
Youtube string = "Youtube"
YoutubeApi string = "https://www.googleapis.com/youtube/v3/videos"
Expand All @@ -80,6 +80,8 @@ const (
HelpfulLinksDescription string = "Hand picked helpful links for users"
)

var KiwixLibraryUrl string = os.Getenv("KIWIX_SERVER_URL")

func (cp *OpenContentProvider) BeforeCreate(tx *gorm.DB) error {
if cp.Title == Youtube && cp.Url == "" {
cp.Url = YoutubeApi
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ services:
- NATS_PASSWORD=dev
- IMG_FILEPATH=/imgs
- MIGRATION_DIR=backend/migrations
- KIWIX_SERVER_URL=https://kiwix.staging.unlockedlabs.xyz
depends_on:
kratos:
condition: service_started
Expand Down
Loading
Loading