123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336 |
- /*
- * DUplicate FInd 0.1
- *
- * Desc: Finds duplicate files and deletes them.
- * Author: Nikola Kotur <kotnik@ns-linux.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- */
- #include <sys/types.h>
- #include <sys/dir.h>
- #include <sys/stat.h>
- #include <sys/param.h>
- #include <errno.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <unistd.h>
- #include <openssl/evp.h>
- #define FALSE 0
- #define TRUE !FALSE
- extern int alphasort();
- extern int errno;
- typedef struct {
- char name[MAXPATHLEN];
- char short_name[FILENAME_MAX];
- size_t size;
- char md5[32];
- int delete;
- struct FILESTRUCT *next;
- } FILESTRUCT;
- int opt_delete=0;
- int is_dupes=0;
- int opt_recursive=0;
- int total_scaned=0, total_deleted=0;
- FILESTRUCT *file_first=NULL;
- FILESTRUCT *file_temp=NULL;
- FILESTRUCT *file_curr=NULL;
- void usage(void);
- int scan_dir(char *, int);
- void delete_list();
- void find_dupes();
- void delete_dupes();
- void print_list();
- int check_md5(char *, char *, char *, char *);
- void clear_buffer(char *, int);
- void add_directory(char *);
- int main(int argc, char *argv[])
- {
- char *opt_dir = NULL; /* Holds a name of start dir */
- extern char *optarg;
- extern int optind, optopt;
- int err_flag=0, c;
- if (argc == 1) {
- opt_delete = 0;
- } else {
- while ((c = getopt(argc, argv, ":drf:")) != -1) {
- switch (c) {
- case 'd':
- opt_delete = 1;
- break;
- case 'f':
- opt_dir = optarg;
- break;
- case 'r':
- opt_recursive = 1;
- break;
- case ':':
- fprintf(stderr, "dufi: Option -%c requires an operand\n", optopt);
- err_flag++;
- break;
- case '?':
- fprintf(stderr, "dufi: Unrecognized -%c option\n", optopt);
- err_flag++;
- break;
- }
- }
- if (err_flag) {
- usage();
- exit(2);
- }
- }
- if (!opt_dir) {
- opt_dir = getenv("PWD");
- }
- return scan_dir(opt_dir, opt_delete);
- }
- void usage(void) {
- printf("Dufi: deletes duplicate files.\n\n");
- printf("Usage:\n");
- printf("dufi [-d] [-r] [-f /path/to/directory]\n");
- printf("\t-d\tif specified, files will be deleted\n");
- printf("\t-r\ttraverse into subdirectories\n");
- printf("\t-f\tstarting directory\n");
- }
- void add_directory(char *dir_name) {
- int res, count, i;
- int file_select();
- char full_name[MAXPATHLEN];
- struct stat filestat;
- struct direct **files;
- FILESTRUCT *file_new;
- res = chdir(dir_name);
- if (res == -1) {
- fprintf(stderr, "dufi: Could not open: %s\n", dir_name);
- } else {
- count = scandir(dir_name, &files, file_select, alphasort);
- for (i=1; i<count+1; ++i) {
- strcpy(full_name, dir_name);
- strcat(full_name, "/");
- strcat(full_name, files[i-1]->d_name);
- stat(full_name, &filestat);
- if (S_ISREG(filestat.st_mode)) { /* If it's a file */
- total_scaned += 1;
- /* Make a new space */
- file_new = (FILESTRUCT *) malloc(sizeof(FILESTRUCT));
- /* Put data into it */
- strcpy(file_new->name, full_name);
- strcpy(file_new->short_name, files[i-1]->d_name);
- file_new->size = filestat.st_size;
- file_new->delete = 0;
- /* Make a link to previous */
- if (file_curr) {
- file_curr->next = (struct FILESTRUCT *) file_new;
- file_curr = file_new;
- } else {
- file_first = file_new;
- file_curr = file_new;
- }
- } else if (S_ISDIR(filestat.st_mode) && opt_recursive) { /* If it's a dir */
- add_directory(full_name);
- }
- }
- }
- }
- int scan_dir(char *directory, int delete) {
- add_directory(directory);
- printf("\n");
- find_dupes();
- if (is_dupes)
- delete_dupes();
- else
- printf("No duplicates.\n");
- delete_list();
- return 0;
- }
- int file_select(struct direct *entry) {
- if ((strcmp(entry->d_name, ".") == 0) ||
- (strcmp(entry->d_name, "..") == 0))
- return FALSE;
- else
- return TRUE;
- }
- void delete_list() {
- FILESTRUCT *curr;
- while (file_first->next) {
- curr = file_first;
- file_first = (FILESTRUCT *) file_first->next;
- free(curr);
- }
- free(file_first);
- }
- void print_list() {
- file_temp = file_first;
- while (file_temp != NULL) {
- printf("Name: %s Size: %d Delete: ", file_temp->name, file_temp->size);
- if(file_temp->delete)
- printf("Y\n");
- else
- printf("N\n");
- file_temp = (FILESTRUCT *) file_temp->next;
- }
- }
- void find_dupes() {
- FILESTRUCT *new_first;
- file_temp = file_first;
- while (file_temp) {
- if(!file_temp->delete) {
- new_first = (FILESTRUCT *) file_temp->next;
- while (new_first) {
- if ((file_temp->size == new_first->size)
- && check_md5(file_temp->name, new_first->name,
- file_temp->short_name, new_first->short_name)) {
- new_first->delete = 1;
- is_dupes = 1;
- total_deleted += 1;
- }
- new_first = (FILESTRUCT *) new_first->next;
- }
- }
- file_temp = (FILESTRUCT *) file_temp->next;
- }
- }
- void delete_dupes() {
- file_temp = file_first;
- size_t total_size = 0, total_kb, total_mb;
- printf("\nTotal files scaned: %d\n", total_scaned);
- if (total_deleted)
- printf("Total duplicates: %d\n\n", total_deleted);
- if (opt_delete && is_dupes)
- printf("Deleting:\n");
- else if (is_dupes)
- printf("Would delete:\n");
- while (file_temp) {
- if (file_temp->delete) {
- printf("%s\n", file_temp->name);
- if (opt_delete) {
- if (unlink(file_temp->name) == -1)
- printf("\tCouldn't delete: %s\n", file_temp->name);
- total_size += file_temp->size;
- }
- }
- file_temp = (FILESTRUCT *) file_temp->next;
- }
- if (opt_delete && is_dupes) {
- total_kb = total_size/1024;
- total_mb = total_kb/1024;
- printf("\nTotal freed: ");
- if (total_mb >= 1)
- printf("%d Mb\n", total_mb);
- else if (total_kb >= 1)
- printf("%d Kb\n", total_kb);
- else
- printf("%d b\n", total_size);
- }
- }
- int check_md5(char *file1, char *file2, char *short1, char *short2) {
- EVP_MD_CTX mdctx;
- const EVP_MD *md;
- unsigned char md_value1[EVP_MAX_MD_SIZE], md_value2[EVP_MAX_MD_SIZE];
- int md_len, i, equals;
- FILE *stream;
- char buffer[2048];
- printf("Found a match! Checking MD5 for: %s <==> %s ...", short1, short2);
- clear_buffer(buffer, 2048);
- OpenSSL_add_all_digests ();
- md = EVP_get_digestbyname("md5");
- EVP_MD_CTX_init (&mdctx);
- EVP_DigestInit_ex (&mdctx, md, NULL);
- if((stream = fopen(file1, "r")) == NULL) {
- printf ("\nCant open file %s. Error is: %d\n", file1, errno);
- exit(1);
- }
- while(fgets(buffer, 2048, stream)) {
- EVP_DigestUpdate (&mdctx, buffer, 2048);
- clear_buffer(buffer, 2048);
- }
- EVP_DigestFinal_ex (&mdctx, md_value1, &md_len);
- EVP_MD_CTX_cleanup (&mdctx);
- fclose(stream);
- EVP_MD_CTX_init (&mdctx);
- EVP_DigestInit_ex (&mdctx, md, NULL);
- if((stream = fopen(file2, "r")) == NULL) {
- printf ("\nCant open file %s. Error is: %d\n", file1, errno);
- exit(1);
- }
- while(fgets(buffer, 2048, stream)) {
- EVP_DigestUpdate (&mdctx, buffer, 2048);
- clear_buffer(buffer, 2048);
- }
- EVP_DigestFinal_ex (&mdctx, md_value2, &md_len);
- EVP_MD_CTX_cleanup (&mdctx);
- fclose(stream);
- equals = 1;
- for (i = 0; i < md_len; i++)
- if (md_value1[i] != md_value2[i])
- equals=0;
-
- if(equals) {
- printf("MATCH!\n");
- return 1;
- } else {
- printf("\n");
- return 0;
- }
- }
- void clear_buffer(char *buffer, int size) {
- int i;
- for (i=0; i<size; i++)
- buffer[i] = 0;
- }
|