-
Notifications
You must be signed in to change notification settings - Fork 9
/
tweets.go
104 lines (96 loc) · 2.17 KB
/
tweets.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
// -*- tab-width: 4; -*-
package main
import (
"bufio"
"log"
"regexp"
"strings"
"time"
)
type Tweeter struct {
Nick string
URL string
}
type Tweet struct {
Tweeter Tweeter
Created time.Time
Text string
}
// typedef to be able to attach sort methods
type Tweets []Tweet
func (tweets Tweets) Len() int {
return len(tweets)
}
func (tweets Tweets) Less(i, j int) bool {
return tweets[i].Created.Before(tweets[j].Created)
}
func (tweets Tweets) Swap(i, j int) {
tweets[i], tweets[j] = tweets[j], tweets[i]
}
func (tweets Tweets) Tags() map[string]int {
tags := make(map[string]int)
re := regexp.MustCompile(`#[-\w]+`)
for _, tweet := range tweets {
for _, tag := range re.FindAllString(tweet.Text, -1) {
tags[strings.TrimLeft(tag, "#")]++
}
}
return tags
}
func ParseFile(scanner *bufio.Scanner, tweeter Tweeter) Tweets {
var tweets Tweets
re := regexp.MustCompile(`^(.+?)(\s+)(.+)$`) // .+? is ungreedy
for scanner.Scan() {
line := scanner.Text()
if line == "" {
continue
}
if strings.HasPrefix(line, "#") {
continue
}
parts := re.FindStringSubmatch(line)
// "Submatch 0 is the match of the entire expression, submatch 1 the
// match of the first parenthesized subexpression, and so on."
if len(parts) != 4 {
if debug {
log.Printf("could not parse: '%s' (source:%s)\n", line, tweeter.URL)
}
continue
}
tweets = append(tweets,
Tweet{
Tweeter: tweeter,
Created: ParseTime(parts[1]),
Text: parts[3],
})
}
if err := scanner.Err(); err != nil {
panic(err)
}
return tweets
}
func ParseTime(timestr string) time.Time {
var tm time.Time
var err error
// Twtxt clients generally uses basically time.RFC3339Nano, but sometimes
// there's a colon in the timezone, or no timezone at all.
for _, layout := range []string{
"2006-01-02T15:04:05.999999999Z07:00",
"2006-01-02T15:04:05.999999999Z0700",
"2006-01-02T15:04:05.999999999",
"2006-01-02T15:04.999999999Z07:00",
"2006-01-02T15:04.999999999Z0700",
"2006-01-02T15:04.999999999",
} {
tm, err = time.Parse(layout, strings.ToUpper(timestr))
if err != nil {
continue
} else {
break
}
}
if err != nil {
return time.Unix(0, 0)
}
return tm
}