-
Notifications
You must be signed in to change notification settings - Fork 0
/
optimized.nim
83 lines (71 loc) · 2.36 KB
/
optimized.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from tables import initCountTable, inc, sort, pairs
from algorithm import SortOrder, sort
from hashes import Hash, `!$`, `!&`
proc hash*(x: string): Hash {.inline.} =
# FNV-1a 64
var h = 0x100000001b3'u64
for c in x:
h = h xor cast[uint64](c)
h *= 0x84222325'u64
return cast[Hash](h)
# Workaround for superfluous copying on table insertion by @alaviss
template `^^`(s, i: untyped): untyped =
(when i is BackwardsIndex: s.len - int(i) else: int(i))
template `{}`[T, U](s: string, slice: HSlice[T, U]): untyped =
s.toOpenArray(s ^^ slice.a, s ^^ slice.b)
func copy(s: var string, data: openArray[char]) {.inline.} =
s.setLen(data.len())
for idx, c in data:
s[idx] = c
# end of workaround
template isWhitespace(c: char): untyped = (c <= ' ')
template notWhitespace(c: char): untyped = (c > ' ')
proc main() =
const bufLen = 64 * 1024
var
words = initCountTable[string]()
buf = newString(bufLen)
offset = 0
wordBuf: string
while true:
# Fill buffer starting from offset (remainder from prev. iteration)
let totalBytes = offset + stdin.readChars(buf{offset .. ^1})
if totalBytes == 0:
break
# Scan the buffer for the last word bound
# to process only up to the potential partial at the end
var lastChar = totalBytes - 1 # an index, never < 0
while lastChar > 0 and buf[lastChar].notWhitespace():
lastChar.dec()
offset = totalBytes - lastChar - 1
# Split buffer into words and process
var
inside = false
wstart, wend = 0
while wstart <= lastChar:
if inside:
if buf[wend].isWhitespace(): # leaving word
inside = false
wordBuf.copy(buf{wstart..<wend})
words.inc(wordBuf)
wstart = wend + 1
continue
else: # traversing a word
if buf[wend] in {'A'..'Z'}:
buf[wend] = chr(ord(buf[wend]) + (ord('a') - ord('A')))
wend.inc()
else:
if buf[wstart].isWhitespace(): # skipping whitespace
wstart.inc()
continue
else: # entering a word
inside = true
wend = wstart
if offset > 0:
# if offset is positive, overflow is impossible
moveMem(buf[0].addr(), buf[lastChar + 1].addr(), offset)
words.sort(SortOrder.Descending)
for (word, count) in words.pairs():
echo(word, ' ', count)
when isMainModule:
main()