Added: downloading and returning wikipedia articles
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
package wikipediadl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
@@ -13,12 +12,17 @@ const (
|
||||
DumpUrl = "https://dumps.wikimedia.org/plwiki/latest/"
|
||||
)
|
||||
|
||||
func FetchArticleBundles() ([]string, error) {
|
||||
scraper := getScraper()
|
||||
|
||||
articles := getAllArticles(scraper)
|
||||
return articles, nil
|
||||
}
|
||||
|
||||
func getScraper() *colly.Collector {
|
||||
s := colly.NewCollector(
|
||||
return colly.NewCollector(
|
||||
colly.AllowedDomains(DumpDomain),
|
||||
)
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func getAllArticles(s *colly.Collector) []string {
|
||||
@@ -58,14 +62,3 @@ func isValidArticle(a string) bool {
|
||||
articleIndex := article[0]
|
||||
return articleIndex >= 48 && articleIndex <= 57
|
||||
}
|
||||
|
||||
func FetchArticles() error {
|
||||
scraper := getScraper()
|
||||
|
||||
articles := getAllArticles(scraper)
|
||||
for _, a := range articles {
|
||||
fmt.Println(a)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user