`

DropBox Interview - Same file in a directory

 
阅读更多

DropBox interview questions:

Get all files that have the same content in a directory.

bool isDir(const char* path) {
    struct stat st;
    lstat(path, &st);
    return 0 != S_ISDIR(st.st_mode);
}
 
uint64_t getHash(string& path) {
    FILE *fp = fopen(path.c_str(), "r");
    if (fp == NULL) {
        cerr << " Can not open the file " << path << endl;
        return 0;
    }
    uint64_t hash_key = 0;
    char c;
    while (fread(&c, 1, 1, fp) == 1) {
        hash_key = hashFunc(hash_key, c);
    }
    fclose(fp);
    return hash_key;
}
 
void getSimilarFiles(string& path, unordered_map<uint64_t, vector<string> >& hash) {
    DIR* dirFile = opendir(path.c_str());
    if (dirFile == NULL)    return;
    struct dirent* hFile;
    while ((hFile = readdir(dirFile)) != NULL) {
        if (!strcmp(hFile->d_name, ".")) continue;
        if (!strcmp(hFile->d_name, ".."))    continue;
        string srcPath(path);
        srcPath.append("/");
        srcPath.append(hFile->d_name);
        if (isDir(srcPath.c_str())) {
            getSimilarFiles(srcPath, hash);
        } else {
            uint64_t hashValue = getHash(srcPath);
            if (hashValue) {
                hash[hashValue].push_back(srcPath);
            }
        }
    }
}
 
void printHash(unordered_map<uint64_t, vector<string> >& hash) {
    unordered_map<uint64_t, vector<string> >::iterator it = hash.begin();
    for (; it != hash.end(); it++) {
        if (it->second.size() <= 1) {
            continue;
        }
        cout << "=========the files below are the same" << endl;
        for (int i = 0; i < it->second.size(); i++) {
            cout << (it->second)[i] << endl;
        }
    }
}
 
void getFileSet(const char* pathInput) {
    unordered_map<uint64_t, vector<string> > hash;
    string path(pathInput);
    getSimilarFiles(path, hash);
    printHash(hash);
}

 

From:

https://linzhongzl.wordpress.com/2014/10/29/same-file/ 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics