Added: downloading and returning wikipedia articles

This commit is contained in:
Oliwier Adamczyk
2025-10-04 23:14:41 +02:00
parent f542f01b49
commit 6df63dc4c1
26 changed files with 636 additions and 100 deletions

View File

@@ -1,7 +1,6 @@
package wikipediadl
import (
"fmt"
"log"
"strings"
@@ -13,12 +12,17 @@ const (
DumpUrl = "https://dumps.wikimedia.org/plwiki/latest/"
)
func FetchArticleBundles() ([]string, error) {
scraper := getScraper()
articles := getAllArticles(scraper)
return articles, nil
}
func getScraper() *colly.Collector {
s := colly.NewCollector(
return colly.NewCollector(
colly.AllowedDomains(DumpDomain),
)
return s
}
func getAllArticles(s *colly.Collector) []string {
@@ -58,14 +62,3 @@ func isValidArticle(a string) bool {
articleIndex := article[0]
return articleIndex >= 48 && articleIndex <= 57
}
func FetchArticles() error {
scraper := getScraper()
articles := getAllArticles(scraper)
for _, a := range articles {
fmt.Println(a)
}
return nil
}