-
Notifications
You must be signed in to change notification settings - Fork 0
/
TextQuery.cpp
117 lines (98 loc) · 3.42 KB
/
TextQuery.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include "TextQuery.h"
//#include "make_plural.h"
#include <cstddef>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <iostream>
#include <fstream>
#include <cctype>
#include <cstring>
#include <utility>
using namespace std;
// read the input file and build the map of lines to line numbers
TextQuery::TextQuery(ifstream &is): file(new vector<string>)
{
string text;
while (getline(is, text)) { // for each line in the file
file->push_back(text); // remember this line of text
int n = file->size() - 1; // the current line number
istringstream line(text); // separate the line into words
string word;
while (line >> word) { // for each word in that line
word = cleanup_str(word);
// if word isn't already in wm, subscripting adds a new entry
auto &lines = wm[word]; // lines is a shared_ptr
if (!lines) // that pointer is null the first time we see word
lines.reset(new set<line_no>); // allocate a new set
lines->insert(n); // insert this line number
}
}
}
// not covered in the book -- cleanup_str removes
// punctuation and converts all text to lowercase so that
// the queries operate in a case insensitive manner
string TextQuery::cleanup_str(const string &word)
{
string ret;
for (auto it = word.begin(); it != word.end(); ++it) {
if (!ispunct(*it))
ret += tolower(*it);
}
return ret;
}
QueryResult
TextQuery::query(const string &sought) const
{
// we'll return a pointer to this set if we don't find sought
static shared_ptr<set<line_no>> nodata(new set<line_no>);
// use find and not a subscript to avoid adding words to wm!
auto loc = wm.find(cleanup_str(sought));
if (loc == wm.end())
return QueryResult(sought, nodata, file); // not found
else
return QueryResult(sought, loc->second, file);
}
inline
string make_plural(size_t ctr, const string &word,
const string &ending)
{
return (ctr > 1) ? word + ending : word;
}
ostream &print(ostream & os, const QueryResult &qr)
{
// if the word was found, print the count and all occurrences
os << qr.sought << " occurs " << qr.lines->size() << " "
<< make_plural(qr.lines->size(), "time", "s") << endl;
// print each line in which the word appeared
for (auto num : *qr.lines) // for every element in the set
// don't confound the user with text lines starting at 0
os << "\t(line " << num + 1 << ") "
<< *(qr.file->begin() + num) << endl;
return os;
}
// debugging routine, not covered in the book
void TextQuery::display_map()
{
auto iter = wm.cbegin(), iter_end = wm.cend();
// for each word in the map
for ( ; iter != iter_end; ++iter) {
cout << "word: " << iter->first << " {";
// fetch location vector as a const reference to avoid copying it
auto text_locs = iter->second;
auto loc_iter = text_locs->cbegin(),
loc_iter_end = text_locs->cend();
// print all line numbers for this word
while (loc_iter != loc_iter_end)
{
cout << *loc_iter;
if (++loc_iter != loc_iter_end)
cout << ", ";
}
cout << "}\n"; // end list of output this word
}
cout << endl; // finished printing entire map
}