Skip to content

Commit f8f43f8

Browse files
committed
[mod] simplify requests to hister && improve favicon handling && do not import already added urls - closes asciimoo#29
1 parent 4df0ea7 commit f8f43f8

4 files changed

Lines changed: 53 additions & 14 deletions

File tree

hister.go

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ import (
3030
"gopkg.in/yaml.v3"
3131
)
3232

33+
const Version = "v0.4.0"
34+
3335
var (
3436
cliErrorStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("9")).Bold(true)
3537
cliSuccessStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("10")).Bold(true)
@@ -39,15 +41,16 @@ var (
3941
)
4042

4143
var (
42-
cfgFile string
43-
cfg *config.Config
44+
cfgFile string
45+
cfg *config.Config
46+
UserAgent = fmt.Sprintf("Mozilla/5.0 (compatible; Hister/%s; +https://hister.org/)", Version)
4447
)
4548

4649
var rootCmd = &cobra.Command{
4750
Use: "hister",
4851
Short: "Web history on steroids",
4952
Long: ui.Banner,
50-
Version: "v0.4.0",
53+
Version: Version,
5154
//Run: func(_ *cobra.Command, _ []string) {
5255
//},
5356
}
@@ -129,7 +132,7 @@ var searchCmd = &cobra.Command{
129132
}
130133
qs := strings.Join(args, " ")
131134
client := &http.Client{Timeout: 5 * time.Second}
132-
req, err := http.NewRequest("GET", cfg.BaseURL("/search?q="+url.QueryEscape(qs)), nil)
135+
req, err := newHisterRequest("GET", "/search?q="+url.QueryEscape(qs), nil)
133136
if err != nil {
134137
exit(1, "Failed to create request: "+err.Error())
135138
}
@@ -184,11 +187,10 @@ var deleteCmd = &cobra.Command{
184187
"url": {u},
185188
}
186189
client := &http.Client{Timeout: 5 * time.Second}
187-
req, err := http.NewRequest("POST", cfg.BaseURL("/delete"), strings.NewReader(formData.Encode()))
190+
req, err := newHisterRequest("POST", "/delete", strings.NewReader(formData.Encode()))
188191
if err != nil {
189192
exit(1, "Failed to create request: "+err.Error())
190193
}
191-
req.Header.Set("Origin", "hister://")
192194
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
193195
resp, err := client.Do(req)
194196
if err != nil {
@@ -442,7 +444,7 @@ func indexURL(u string) error {
442444
log.Warn().Msg("URL must not be empty")
443445
return nil
444446
}
445-
req, err := http.NewRequest("GET", u, nil)
447+
req, err := newRequest("GET", u, nil)
446448
if err != nil {
447449
return errors.New(`failed to download file: ` + err.Error())
448450
}
@@ -473,7 +475,7 @@ func indexURL(u string) error {
473475
return errors.New(`failed to process document: ` + err.Error())
474476
}
475477
if d.Favicon == "" {
476-
err := d.DownloadFavicon()
478+
err := d.DownloadFavicon(UserAgent)
477479
if err != nil {
478480
log.Warn().Err(err).Str("URL", d.URL).Msg("failed to download favicon")
479481
}
@@ -483,11 +485,10 @@ func indexURL(u string) error {
483485
return errors.New(`failed to encode document to JSON: ` + err.Error())
484486
}
485487
histerClient := &http.Client{}
486-
req, err = http.NewRequest("POST", cfg.BaseURL("/add"), bytes.NewBuffer(dj))
488+
req, err = newHisterRequest("POST", "/add", bytes.NewBuffer(dj))
487489
if err != nil {
488490
return fmt.Errorf("failed to create request: %w", err)
489491
}
490-
req.Header.Set("Origin", "hister://")
491492
req.Header.Set("content-Type", "application/json")
492493
resp, err := histerClient.Do(req)
493494
if err != nil {
@@ -546,12 +547,31 @@ func importHistory(cmd *cobra.Command, args []string) {
546547
}
547548
defer rows.Close()
548549
i := 1
550+
client := &http.Client{}
549551
for rows.Next() {
550552
var u string
551553
err = rows.Scan(&u)
552554
if err != nil {
553555
exit(1, "Failed to retreive URL: "+err.Error())
554556
}
557+
if !strings.HasPrefix(u, "http://") && !strings.HasPrefix(u, "https://") {
558+
continue
559+
}
560+
req, err := newHisterRequest("GET", "/document?url="+url.QueryEscape(u), nil)
561+
if err != nil {
562+
log.Warn().Err(err).Str("URL", u).Msg("Failed to create request, skipping ")
563+
continue
564+
}
565+
resp, err := client.Do(req)
566+
if err != nil {
567+
log.Warn().Err(err).Str("URL", u).Msg("Failed to get info about URL, skipping")
568+
continue
569+
}
570+
resp.Body.Close()
571+
if resp.StatusCode == http.StatusOK {
572+
// skip already added URLs
573+
continue
574+
}
555575
fmt.Printf("[%d/%d] %s\n", i, count, u)
556576
if err := indexURL(u); err != nil {
557577
log.Warn().Err(err).Msg("Failed to index URL")
@@ -567,6 +587,24 @@ func importHistory(cmd *cobra.Command, args []string) {
567587
//q += fmt.Sprintf(" AND %s >= datetime('now', 'localtime', '-1 month')", vf)
568588
}
569589

590+
func newRequest(method, u string, payload io.Reader) (*http.Request, error) {
591+
req, err := http.NewRequest(method, u, payload)
592+
if err != nil {
593+
return req, err
594+
}
595+
req.Header.Set("User-Agent", UserAgent)
596+
return req, nil
597+
}
598+
599+
func newHisterRequest(method, u string, payload io.Reader) (*http.Request, error) {
600+
req, err := newRequest(method, cfg.BaseURL(u), payload)
601+
if err != nil {
602+
return req, err
603+
}
604+
req.Header.Set("Origin", "hister://")
605+
return req, nil
606+
}
607+
570608
func main() {
571609
rootCmd.Execute()
572610
}

server/api.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ func init() {
8989
Description: "Get document by URL",
9090
Args: []*EndpointArg{
9191
&EndpointArg{
92-
Name: "URL",
92+
Name: "url",
9393
Type: "string",
9494
Required: true,
9595
Description: "URL of the document",

server/indexer/extractors.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,5 +126,6 @@ func (e *readabilityExtractor) Extract(d *Document) error {
126126
}
127127
d.Text = buf.String()
128128
d.Title = a.Title()
129+
d.faviconURL = a.Favicon()
129130
return nil
130131
}

server/indexer/indexer.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ func (d *Document) extractHTML() error {
349349
return Extract(d)
350350
}
351351

352-
func (d *Document) DownloadFavicon() error {
352+
func (d *Document) DownloadFavicon(userAgent string) error {
353353
if d.faviconURL == "" {
354354
d.faviconURL = fullURL(d.URL, "/favicon.ico")
355355
}
@@ -361,7 +361,7 @@ func (d *Document) DownloadFavicon() error {
361361
Timeout: 10 * time.Second,
362362
}
363363
req, err := http.NewRequest("GET", d.faviconURL, nil)
364-
req.Header.Set("User-Agent", "Hister")
364+
req.Header.Set("User-Agent", userAgent)
365365
if err != nil {
366366
return err
367367
}
@@ -372,7 +372,7 @@ func (d *Document) DownloadFavicon() error {
372372
defer resp.Body.Close()
373373

374374
if resp.StatusCode != http.StatusOK {
375-
return errors.New("invalid status code")
375+
return fmt.Errorf("invalid status code (%d)", resp.StatusCode)
376376
}
377377

378378
data, err := io.ReadAll(resp.Body)

0 commit comments

Comments
 (0)