Converting C++ code to html
The C++ code (yes, it's primitive,I know) I presented in my last
entry was not formatted attractively. So I researched the web for
a C++ to html formatter (in C++). I pretty soon found it in one of
the examples in the boost library regex documentation. After some
modifications,I got it to format the previous C++ code. It looks
nicer now (but it's still novice level). Here's the output:
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <set>
#include <cctype>
#include <boost/tokenizer.hpp>
#include <boost/regex.hpp>
using namespace std;
using namespace boost;
map<string,int> model;
void toLower(basic_string<char> &s) {
for (basic_string<char>::iterator p = s.begin();
p != s.end(); ++p) {
*p = tolower(*p);
}
}
map<string,int> train_model(string filename) {
ifstream in(filename.c_str());
string tmp;
regex re("[a-zA-Z]+");
map<string,int> model;
while(!in.eof()) {
getline(in, tmp, '\n');
tokenizer<> tok(tmp);
for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
if (regex_match(*beg, re)) {
string t(*beg);
toLower(t);
model[t]++;
}
}
}
return model;
}
set<string> edit1(string w) {
set<string> edit1set;
int word_len = w.length();
string alphabet("abcdefghijklmnopqrstuvwxyz");
//deletion
for(int i = 0; i < word_len; ++i)
edit1set.insert(w.substr(0,i) + w.substr(i+1,word_len));
//transposition
for(int i = 0; i < word_len-1; ++i)
edit1set.insert(w.substr(0,i) + w[i+1] + w[i] + w.substr(i+2,word_len));
// alteration
for(int i = 0; i < word_len; ++i) {
for(string::iterator si=alphabet.begin(); si != alphabet.end(); ++si) {
edit1set.insert(w.substr(0,i) + *si + w.substr(i+1,word_len));
}
}
//insertion
for(int i = 0; i < word_len+1; ++i) {
for(string::iterator si=alphabet.begin(); si != alphabet.end(); ++si) {
edit1set.insert(w.substr(0,i) + *si + w.substr(i,word_len)); }
}
return edit1set;
}
set<string> known_edit2(string w) {
set<string> ed2;
set<string> ed1 = edit1(w);
for(set<string>::iterator si = ed1.begin(); si != ed1.end(); ++si) {
set<string> tmp = edit1(*si);
for(set<string>::iterator si2 = tmp.begin(); si2 !=tmp.end(); ++si2) {
if(model.find(*si2) != model.end()) ed2.insert(*si2);
}
}
return ed2;
}
set<string> known(set<string> words) {
set<string> known_set;
for(set<string>::const_iterator si = words.begin(); si != words.end(); ++si) {
if(model.find(*si) != model.end()) known_set.insert(*si);
}
return known_set;
}
string correct(string word) {
set<string> candidates;
set<string> w;
w.insert(word);
if((candidates = known(w)).size() == 0) {
if((candidates = known(edit1(word))).size() == 0){
if((candidates = known(known_edit2(word))).size() == 0) {
return word;
}
}
}
int cur_max = 0;
string tmp;
for(set<string>::const_iterator si = candidates.begin(); si !=candidates.end(); ++si) {
if(model[*si] > cur_max) {
cur_max = model[*si];
tmp = *si;
}
}
return tmp;
}
int main(int argc, char** argv) {
model = train_model("big.txt");
string input;
while (1) {
cout << "Testing correct(), enter a word: ";
cin >> input;
cout << "correct(" << input << ")" << " returns " << correct(input) << '\n';
}
}
Labels: C++

1 Comments:
Good for people to know.
Post a Comment
Subscribe to Post Comments [Atom]
<< Home