The mythical man-hour.
Got this this(pic) from programming.reddit.com. Gold.
Seeking the souce to technology and life
global reads, writes, total_ioHere's the output:
probe kernel.function("vfs_read") {
reads[execname()] += $count
}
probe kernel.function("vfs_write") {
writes[execname()] += $count
}
# print top 10 IO processes every 5 seconds
probe timer.s(5) {
foreach (name in writes)
total_io[name] += writes[name]
foreach (name in reads)
total_io[name] += reads[name]
printf ("%16s\t%10s\t%10s\n", "Process", "KB Read", "KB Written")
foreach (name in total_io- limit 10)
printf("%16s\t%10d\t%10d\n", name,
reads[name]/1024, writes[name]/1024)
delete reads
delete writes
delete total_io
print("\n")
}
Process KB Read KB Written
Xvnc 16831 3
grep 5754 3
sort 2046 0
xterm 718 19
twm 610 15
vncserver 153 0
sshd 128 0
bash 52 0
cat 33 0
yast 29 0
Labels: linux
The C++ code (yes, it's primitive,I know) I presented in my last
entry was not formatted attractively. So I researched the web for
a C++ to html formatter (in C++). I pretty soon found it in one of
the examples in the boost library regex documentation. After some
modifications,I got it to format the previous C++ code. It looks
nicer now (but it's still novice level). Here's the output:
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <set>
#include <cctype>
#include <boost/tokenizer.hpp>
#include <boost/regex.hpp>
using namespace std;
using namespace boost;
map<string,int> model;
void toLower(basic_string<char> &s) {
for (basic_string<char>::iterator p = s.begin();
p != s.end(); ++p) {
*p = tolower(*p);
}
}
map<string,int> train_model(string filename) {
ifstream in(filename.c_str());
string tmp;
regex re("[a-zA-Z]+");
map<string,int> model;
while(!in.eof()) {
getline(in, tmp, '\n');
tokenizer<> tok(tmp);
for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
if (regex_match(*beg, re)) {
string t(*beg);
toLower(t);
model[t]++;
}
}
}
return model;
}
set<string> edit1(string w) {
set<string> edit1set;
int word_len = w.length();
string alphabet("abcdefghijklmnopqrstuvwxyz");
//deletion
for(int i = 0; i < word_len; ++i)
edit1set.insert(w.substr(0,i) + w.substr(i+1,word_len));
//transposition
for(int i = 0; i < word_len-1; ++i)
edit1set.insert(w.substr(0,i) + w[i+1] + w[i] + w.substr(i+2,word_len));
// alteration
for(int i = 0; i < word_len; ++i) {
for(string::iterator si=alphabet.begin(); si != alphabet.end(); ++si) {
edit1set.insert(w.substr(0,i) + *si + w.substr(i+1,word_len));
}
}
//insertion
for(int i = 0; i < word_len+1; ++i) {
for(string::iterator si=alphabet.begin(); si != alphabet.end(); ++si) {
edit1set.insert(w.substr(0,i) + *si + w.substr(i,word_len)); }
}
return edit1set;
}
set<string> known_edit2(string w) {
set<string> ed2;
set<string> ed1 = edit1(w);
for(set<string>::iterator si = ed1.begin(); si != ed1.end(); ++si) {
set<string> tmp = edit1(*si);
for(set<string>::iterator si2 = tmp.begin(); si2 !=tmp.end(); ++si2) {
if(model.find(*si2) != model.end()) ed2.insert(*si2);
}
}
return ed2;
}
set<string> known(set<string> words) {
set<string> known_set;
for(set<string>::const_iterator si = words.begin(); si != words.end(); ++si) {
if(model.find(*si) != model.end()) known_set.insert(*si);
}
return known_set;
}
string correct(string word) {
set<string> candidates;
set<string> w;
w.insert(word);
if((candidates = known(w)).size() == 0) {
if((candidates = known(edit1(word))).size() == 0){
if((candidates = known(known_edit2(word))).size() == 0) {
return word;
}
}
}
int cur_max = 0;
string tmp;
for(set<string>::const_iterator si = candidates.begin(); si !=candidates.end(); ++si) {
if(model[*si] > cur_max) {
cur_max = model[*si];
tmp = *si;
}
}
return tmp;
}
int main(int argc, char** argv) {
model = train_model("big.txt");
string input;
while (1) {
cout << "Testing correct(), enter a word: ";
cin >> input;
cout << "correct(" << input << ")" << " returns " << correct(input) << '\n';
}
}
Labels: C++
Recently I came across Peter Norvig's latest essay on implementing a Bayesian spelling
corrector in Python. Inspired, I re-implemented something similar in C++, using it as a
chance to revise my very rusty C++ skills (see below). It's absolutely nowhere near the
elegance of Peter's python code but it seems to work which is all I can ask for.
Compiles with g++ and boost libraries (including the regex one) with -lboost_regex switch.
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <set>
#include <cctype>
#include <boost/tokenizer.hpp>
#include <boost/regex.hpp>
using namespace std;
using namespace boost;
map<string,int> model;
void toLower(basic_string<char> &s) {
for (basic_string<char>::iterator p = s.begin();
p != s.end(); ++p) {
*p = tolower(*p);
}
}
map<string,int> train_model(string filename) {
ifstream in(filename.c_str());
string tmp;
regex re("[a-zA-Z]+");
map<string,int> model;
while(!in.eof()) {
getline(in, tmp, '\n');
tokenizer<> tok(tmp);
for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
if (regex_match(*beg, re)) {
string t(*beg);
toLower(t);
model[t]++;
}
}
}
return model;
}
set<string> edit1(string w) {
set<string> edit1set;
int word_len = w.length();
string alphabet("abcdefghijklmnopqrstuvwxyz");
//deletion
for(int i = 0; i < word_len; ++i)
edit1set.insert(w.substr(0,i) + w.substr(i+1,word_len));
//transposition
for(int i = 0; i < word_len-1; ++i)
edit1set.insert(w.substr(0,i) + w[i+1] + w[i] + w.substr(i+2,word_len));
// alteration
for(int i = 0; i < word_len; ++i) {
for(string::iterator si=alphabet.begin(); si != alphabet.end(); ++si) {
edit1set.insert(w.substr(0,i) + *si + w.substr(i+1,word_len));
}
}
//insertion
for(int i = 0; i < word_len+1; ++i) {
for(string::iterator si=alphabet.begin(); si != alphabet.end(); ++si) {
edit1set.insert(w.substr(0,i) + *si + w.substr(i,word_len)); }
}
return edit1set;
}
set<string> known_edit2(string w) {
set<string> ed2;
set<string> ed1 = edit1(w);
for(set<string>::iterator si = ed1.begin(); si != ed1.end(); ++si) {
set<string> tmp = edit1(*si);
for(set<string>::iterator si2 = tmp.begin(); si2 !=tmp.end(); ++si2) {
if(model.find(*si2) != model.end()) ed2.insert(*si2);
}
}
return ed2;
}
set<string> known(set<string> words) {
set<string> known_set;
for(set<string>::const_iterator si = words.begin(); si != words.end(); ++si) {
if(model.find(*si) != model.end()) known_set.insert(*si);
}
return known_set;
}
string correct(string word) {
set<string> candidates;
set<string> w;
w.insert(word);
if((candidates = known(w)).size() == 0) {
if((candidates = known(edit1(word))).size() == 0){
if((candidates = known(known_edit2(word))).size() == 0) {
return word;
}
}
}
int cur_max = 0;
string tmp;
for(set<string>::const_iterator si = candidates.begin(); si !=candidates.end(); ++si) {
if(model[*si] > cur_max) {
cur_max = model[*si];
tmp = *si;
}
}
return tmp;
}
int main(int argc, char** argv) {
model = train_model("big.txt");
string input;
while (1) {
cout << "Testing correct(), enter a word: ";
cin >> input;
cout << "correct(" << input << ")" << " returns " << correct(input) << '\n';
}
}