/* * DUplicate FInd 0.1 * * Desc: Finds duplicate files and deletes them. * Author: Nikola Kotur * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * */ #include #include #include #include #include #include #include #include #include #define FALSE 0 #define TRUE !FALSE extern int alphasort(); extern int errno; typedef struct { char name[MAXPATHLEN]; char short_name[FILENAME_MAX]; size_t size; char md5[32]; int delete; struct FILESTRUCT *next; } FILESTRUCT; int opt_delete=0; int is_dupes=0; int opt_recursive=0; int total_scaned=0, total_deleted=0; FILESTRUCT *file_first=NULL; FILESTRUCT *file_temp=NULL; FILESTRUCT *file_curr=NULL; void usage(void); int scan_dir(char *, int); void delete_list(); void find_dupes(); void delete_dupes(); void print_list(); int check_md5(char *, char *, char *, char *); void clear_buffer(char *, int); void add_directory(char *); int main(int argc, char *argv[]) { char *opt_dir = NULL; /* Holds a name of start dir */ extern char *optarg; extern int optind, optopt; int err_flag=0, c; if (argc == 1) { opt_delete = 0; } else { while ((c = getopt(argc, argv, ":drf:")) != -1) { switch (c) { case 'd': opt_delete = 1; break; case 'f': opt_dir = optarg; break; case 'r': opt_recursive = 1; break; case ':': fprintf(stderr, "dufi: Option -%c requires an operand\n", optopt); err_flag++; break; case '?': fprintf(stderr, "dufi: Unrecognized -%c option\n", optopt); err_flag++; break; } } if (err_flag) { usage(); exit(2); } } if (!opt_dir) { opt_dir = getenv("PWD"); } return scan_dir(opt_dir, opt_delete); } void usage(void) { printf("Dufi: deletes duplicate files.\n\n"); printf("Usage:\n"); printf("dufi [-d] [-r] [-f /path/to/directory]\n"); printf("\t-d\tif specified, files will be deleted\n"); printf("\t-r\ttraverse into subdirectories\n"); printf("\t-f\tstarting directory\n"); } void add_directory(char *dir_name) { int res, count, i; int file_select(); char full_name[MAXPATHLEN]; struct stat filestat; struct direct **files; FILESTRUCT *file_new; res = chdir(dir_name); if (res == -1) { fprintf(stderr, "dufi: Could not open: %s\n", dir_name); } else { count = scandir(dir_name, &files, file_select, alphasort); for (i=1; id_name); stat(full_name, &filestat); if (S_ISREG(filestat.st_mode)) { /* If it's a file */ total_scaned += 1; /* Make a new space */ file_new = (FILESTRUCT *) malloc(sizeof(FILESTRUCT)); /* Put data into it */ strcpy(file_new->name, full_name); strcpy(file_new->short_name, files[i-1]->d_name); file_new->size = filestat.st_size; file_new->delete = 0; /* Make a link to previous */ if (file_curr) { file_curr->next = (struct FILESTRUCT *) file_new; file_curr = file_new; } else { file_first = file_new; file_curr = file_new; } } else if (S_ISDIR(filestat.st_mode) && opt_recursive) { /* If it's a dir */ add_directory(full_name); } } } } int scan_dir(char *directory, int delete) { add_directory(directory); printf("\n"); find_dupes(); if (is_dupes) delete_dupes(); else printf("No duplicates.\n"); delete_list(); return 0; } int file_select(struct direct *entry) { if ((strcmp(entry->d_name, ".") == 0) || (strcmp(entry->d_name, "..") == 0)) return FALSE; else return TRUE; } void delete_list() { FILESTRUCT *curr; while (file_first->next) { curr = file_first; file_first = (FILESTRUCT *) file_first->next; free(curr); } free(file_first); } void print_list() { file_temp = file_first; while (file_temp != NULL) { printf("Name: %s Size: %d Delete: ", file_temp->name, file_temp->size); if(file_temp->delete) printf("Y\n"); else printf("N\n"); file_temp = (FILESTRUCT *) file_temp->next; } } void find_dupes() { FILESTRUCT *new_first; file_temp = file_first; while (file_temp) { if(!file_temp->delete) { new_first = (FILESTRUCT *) file_temp->next; while (new_first) { if ((file_temp->size == new_first->size) && check_md5(file_temp->name, new_first->name, file_temp->short_name, new_first->short_name)) { new_first->delete = 1; is_dupes = 1; total_deleted += 1; } new_first = (FILESTRUCT *) new_first->next; } } file_temp = (FILESTRUCT *) file_temp->next; } } void delete_dupes() { file_temp = file_first; size_t total_size = 0, total_kb, total_mb; printf("\nTotal files scaned: %d\n", total_scaned); if (total_deleted) printf("Total duplicates: %d\n\n", total_deleted); if (opt_delete && is_dupes) printf("Deleting:\n"); else if (is_dupes) printf("Would delete:\n"); while (file_temp) { if (file_temp->delete) { printf("%s\n", file_temp->name); if (opt_delete) { if (unlink(file_temp->name) == -1) printf("\tCouldn't delete: %s\n", file_temp->name); total_size += file_temp->size; } } file_temp = (FILESTRUCT *) file_temp->next; } if (opt_delete && is_dupes) { total_kb = total_size/1024; total_mb = total_kb/1024; printf("\nTotal freed: "); if (total_mb >= 1) printf("%d Mb\n", total_mb); else if (total_kb >= 1) printf("%d Kb\n", total_kb); else printf("%d b\n", total_size); } } int check_md5(char *file1, char *file2, char *short1, char *short2) { EVP_MD_CTX mdctx; const EVP_MD *md; unsigned char md_value1[EVP_MAX_MD_SIZE], md_value2[EVP_MAX_MD_SIZE]; int md_len, i, equals; FILE *stream; char buffer[2048]; printf("Found a match! Checking MD5 for: %s <==> %s ...", short1, short2); clear_buffer(buffer, 2048); OpenSSL_add_all_digests (); md = EVP_get_digestbyname("md5"); EVP_MD_CTX_init (&mdctx); EVP_DigestInit_ex (&mdctx, md, NULL); if((stream = fopen(file1, "r")) == NULL) { printf ("\nCant open file %s. Error is: %d\n", file1, errno); exit(1); } while(fgets(buffer, 2048, stream)) { EVP_DigestUpdate (&mdctx, buffer, 2048); clear_buffer(buffer, 2048); } EVP_DigestFinal_ex (&mdctx, md_value1, &md_len); EVP_MD_CTX_cleanup (&mdctx); fclose(stream); EVP_MD_CTX_init (&mdctx); EVP_DigestInit_ex (&mdctx, md, NULL); if((stream = fopen(file2, "r")) == NULL) { printf ("\nCant open file %s. Error is: %d\n", file1, errno); exit(1); } while(fgets(buffer, 2048, stream)) { EVP_DigestUpdate (&mdctx, buffer, 2048); clear_buffer(buffer, 2048); } EVP_DigestFinal_ex (&mdctx, md_value2, &md_len); EVP_MD_CTX_cleanup (&mdctx); fclose(stream); equals = 1; for (i = 0; i < md_len; i++) if (md_value1[i] != md_value2[i]) equals=0; if(equals) { printf("MATCH!\n"); return 1; } else { printf("\n"); return 0; } } void clear_buffer(char *buffer, int size) { int i; for (i=0; i