//sort vertices by degrees //input line format: vid \t num nb1 nb2 ... //output line format: order vid \t degree //we do not hold vertex info, like adjacency list, since it will cause load balance issue due to range-based partitioning //then use a program to extract top-k, and report the degree threshold (and adjusted k if ties are observed) #include "utils/TeraSort.h" #include "utils/type.h" double sampRate=0.001; class DegreeSort:public TeraWorker { char tmp[100]; public: DegreeSort():TeraWorker(sampRate, true){} virtual TeraItem* toVertex(char* line) { TeraItem* v=new TeraItem; char * pch; pch=strtok(line, "\t"); int id=atoi(pch); v->key.v2=id; pch=strtok(NULL, " "); int deg=atoi(pch); v->key.v1=-deg;//use negative degree to put large degree vertices to the head return v; } virtual void toline(TeraItem* v) { sprintf(tmp, "%d\t%d\n", v->key.v2, -v->key.v1); write(tmp); } }; int main(int argc, char* argv[]){ WorkerParams param; param.input_path="/toy_ug"; param.output_path="/sort"; param.force_write=true; param.native_dispatcher=false; DegreeSort worker; worker.run(param); return 0; }