|
@@ -0,0 +1,336 @@
|
|
|
+/*
|
|
|
+ * DUplicate FInd 0.1
|
|
|
+ *
|
|
|
+ * Desc: Finds duplicate files and deletes them.
|
|
|
+ * Author: Nikola Kotur <kotnik@ns-linux.org>
|
|
|
+ *
|
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
|
+ * (at your option) any later version.
|
|
|
+ *
|
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
+ * GNU General Public License for more details.
|
|
|
+ *
|
|
|
+ * You should have received a copy of the GNU General Public License
|
|
|
+ * along with this program; if not, write to the Free Software
|
|
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
+ *
|
|
|
+ */
|
|
|
+
|
|
|
+#include <sys/types.h>
|
|
|
+#include <sys/dir.h>
|
|
|
+#include <sys/stat.h>
|
|
|
+#include <sys/param.h>
|
|
|
+#include <errno.h>
|
|
|
+#include <stdio.h>
|
|
|
+#include <stdlib.h>
|
|
|
+#include <unistd.h>
|
|
|
+#include <openssl/evp.h>
|
|
|
+
|
|
|
+#define FALSE 0
|
|
|
+#define TRUE !FALSE
|
|
|
+
|
|
|
+extern int alphasort();
|
|
|
+extern int errno;
|
|
|
+
|
|
|
+typedef struct {
|
|
|
+ char name[MAXPATHLEN];
|
|
|
+ char short_name[FILENAME_MAX];
|
|
|
+ size_t size;
|
|
|
+ char md5[32];
|
|
|
+ int delete;
|
|
|
+ struct FILESTRUCT *next;
|
|
|
+} FILESTRUCT;
|
|
|
+
|
|
|
+int opt_delete=0;
|
|
|
+int is_dupes=0;
|
|
|
+int opt_recursive=0;
|
|
|
+int total_scaned=0, total_deleted=0;
|
|
|
+
|
|
|
+FILESTRUCT *file_first=NULL;
|
|
|
+FILESTRUCT *file_temp=NULL;
|
|
|
+FILESTRUCT *file_curr=NULL;
|
|
|
+
|
|
|
+void usage(void);
|
|
|
+int scan_dir(char *, int);
|
|
|
+void delete_list();
|
|
|
+void find_dupes();
|
|
|
+void delete_dupes();
|
|
|
+void print_list();
|
|
|
+int check_md5(char *, char *, char *, char *);
|
|
|
+void clear_buffer(char *, int);
|
|
|
+void add_directory(char *);
|
|
|
+
|
|
|
+int main(int argc, char *argv[])
|
|
|
+{
|
|
|
+ char *opt_dir = NULL; /* Holds a name of start dir */
|
|
|
+ extern char *optarg;
|
|
|
+ extern int optind, optopt;
|
|
|
+ int err_flag=0, c;
|
|
|
+
|
|
|
+ if (argc == 1) {
|
|
|
+ opt_delete = 0;
|
|
|
+ } else {
|
|
|
+ while ((c = getopt(argc, argv, ":drf:")) != -1) {
|
|
|
+ switch (c) {
|
|
|
+ case 'd':
|
|
|
+ opt_delete = 1;
|
|
|
+ break;
|
|
|
+ case 'f':
|
|
|
+ opt_dir = optarg;
|
|
|
+ break;
|
|
|
+ case 'r':
|
|
|
+ opt_recursive = 1;
|
|
|
+ break;
|
|
|
+ case ':':
|
|
|
+ fprintf(stderr, "dufi: Option -%c requires an operand\n", optopt);
|
|
|
+ err_flag++;
|
|
|
+ break;
|
|
|
+ case '?':
|
|
|
+ fprintf(stderr, "dufi: Unrecognized -%c option\n", optopt);
|
|
|
+ err_flag++;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (err_flag) {
|
|
|
+ usage();
|
|
|
+ exit(2);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!opt_dir) {
|
|
|
+ opt_dir = getenv("PWD");
|
|
|
+ }
|
|
|
+
|
|
|
+ return scan_dir(opt_dir, opt_delete);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+void usage(void) {
|
|
|
+ printf("Dufi: deletes duplicate files.\n\n");
|
|
|
+ printf("Usage:\n");
|
|
|
+ printf("dufi [-d] [-r] [-f /path/to/directory]\n");
|
|
|
+ printf("\t-d\tif specified, files will be deleted\n");
|
|
|
+ printf("\t-r\ttraverse into subdirectories\n");
|
|
|
+ printf("\t-f\tstarting directory\n");
|
|
|
+}
|
|
|
+
|
|
|
+void add_directory(char *dir_name) {
|
|
|
+ int res, count, i;
|
|
|
+ int file_select();
|
|
|
+ char full_name[MAXPATHLEN];
|
|
|
+ struct stat filestat;
|
|
|
+ struct direct **files;
|
|
|
+ FILESTRUCT *file_new;
|
|
|
+
|
|
|
+ res = chdir(dir_name);
|
|
|
+ if (res == -1) {
|
|
|
+ fprintf(stderr, "dufi: Could not open: %s\n", dir_name);
|
|
|
+ } else {
|
|
|
+
|
|
|
+ count = scandir(dir_name, &files, file_select, alphasort);
|
|
|
+
|
|
|
+ for (i=1; i<count+1; ++i) {
|
|
|
+ strcpy(full_name, dir_name);
|
|
|
+ strcat(full_name, "/");
|
|
|
+ strcat(full_name, files[i-1]->d_name);
|
|
|
+ stat(full_name, &filestat);
|
|
|
+ if (S_ISREG(filestat.st_mode)) { /* If it's a file */
|
|
|
+ total_scaned += 1;
|
|
|
+ /* Make a new space */
|
|
|
+ file_new = (FILESTRUCT *) malloc(sizeof(FILESTRUCT));
|
|
|
+ /* Put data into it */
|
|
|
+ strcpy(file_new->name, full_name);
|
|
|
+ strcpy(file_new->short_name, files[i-1]->d_name);
|
|
|
+ file_new->size = filestat.st_size;
|
|
|
+ file_new->delete = 0;
|
|
|
+ /* Make a link to previous */
|
|
|
+ if (file_curr) {
|
|
|
+ file_curr->next = (struct FILESTRUCT *) file_new;
|
|
|
+ file_curr = file_new;
|
|
|
+ } else {
|
|
|
+ file_first = file_new;
|
|
|
+ file_curr = file_new;
|
|
|
+ }
|
|
|
+ } else if (S_ISDIR(filestat.st_mode) && opt_recursive) { /* If it's a dir */
|
|
|
+ add_directory(full_name);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+int scan_dir(char *directory, int delete) {
|
|
|
+ add_directory(directory);
|
|
|
+ printf("\n");
|
|
|
+ find_dupes();
|
|
|
+ if (is_dupes)
|
|
|
+ delete_dupes();
|
|
|
+ else
|
|
|
+ printf("No duplicates.\n");
|
|
|
+ delete_list();
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+int file_select(struct direct *entry) {
|
|
|
+ if ((strcmp(entry->d_name, ".") == 0) ||
|
|
|
+ (strcmp(entry->d_name, "..") == 0))
|
|
|
+ return FALSE;
|
|
|
+ else
|
|
|
+ return TRUE;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+void delete_list() {
|
|
|
+ FILESTRUCT *curr;
|
|
|
+ while (file_first->next) {
|
|
|
+ curr = file_first;
|
|
|
+ file_first = (FILESTRUCT *) file_first->next;
|
|
|
+ free(curr);
|
|
|
+ }
|
|
|
+ free(file_first);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+void print_list() {
|
|
|
+ file_temp = file_first;
|
|
|
+ while (file_temp != NULL) {
|
|
|
+ printf("Name: %s Size: %d Delete: ", file_temp->name, file_temp->size);
|
|
|
+ if(file_temp->delete)
|
|
|
+ printf("Y\n");
|
|
|
+ else
|
|
|
+ printf("N\n");
|
|
|
+ file_temp = (FILESTRUCT *) file_temp->next;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+void find_dupes() {
|
|
|
+ FILESTRUCT *new_first;
|
|
|
+ file_temp = file_first;
|
|
|
+ while (file_temp) {
|
|
|
+ if(!file_temp->delete) {
|
|
|
+ new_first = (FILESTRUCT *) file_temp->next;
|
|
|
+ while (new_first) {
|
|
|
+ if ((file_temp->size == new_first->size)
|
|
|
+ && check_md5(file_temp->name, new_first->name,
|
|
|
+ file_temp->short_name, new_first->short_name)) {
|
|
|
+ new_first->delete = 1;
|
|
|
+ is_dupes = 1;
|
|
|
+ total_deleted += 1;
|
|
|
+ }
|
|
|
+ new_first = (FILESTRUCT *) new_first->next;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ file_temp = (FILESTRUCT *) file_temp->next;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+void delete_dupes() {
|
|
|
+ file_temp = file_first;
|
|
|
+ size_t total_size = 0, total_kb, total_mb;
|
|
|
+ printf("\nTotal files scaned: %d\n", total_scaned);
|
|
|
+ if (total_deleted)
|
|
|
+ printf("Total duplicates: %d\n\n", total_deleted);
|
|
|
+ if (opt_delete && is_dupes)
|
|
|
+ printf("Deleting:\n");
|
|
|
+ else if (is_dupes)
|
|
|
+ printf("Would delete:\n");
|
|
|
+ while (file_temp) {
|
|
|
+ if (file_temp->delete) {
|
|
|
+ printf("%s\n", file_temp->name);
|
|
|
+ if (opt_delete) {
|
|
|
+ if (unlink(file_temp->name) == -1)
|
|
|
+ printf("\tCouldn't delete: %s\n", file_temp->name);
|
|
|
+ total_size += file_temp->size;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ file_temp = (FILESTRUCT *) file_temp->next;
|
|
|
+ }
|
|
|
+ if (opt_delete && is_dupes) {
|
|
|
+ total_kb = total_size/1024;
|
|
|
+ total_mb = total_kb/1024;
|
|
|
+ printf("\nTotal freed: ");
|
|
|
+ if (total_mb >= 1)
|
|
|
+ printf("%d Mb\n", total_mb);
|
|
|
+ else if (total_kb >= 1)
|
|
|
+ printf("%d Kb\n", total_kb);
|
|
|
+ else
|
|
|
+ printf("%d b\n", total_size);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+int check_md5(char *file1, char *file2, char *short1, char *short2) {
|
|
|
+ EVP_MD_CTX mdctx;
|
|
|
+ const EVP_MD *md;
|
|
|
+ unsigned char md_value1[EVP_MAX_MD_SIZE], md_value2[EVP_MAX_MD_SIZE];
|
|
|
+ int md_len, i, equals;
|
|
|
+ FILE *stream;
|
|
|
+ char buffer[2048];
|
|
|
+
|
|
|
+ printf("Found a match! Checking MD5 for: %s <==> %s ...", short1, short2);
|
|
|
+
|
|
|
+ clear_buffer(buffer, 2048);
|
|
|
+
|
|
|
+ OpenSSL_add_all_digests ();
|
|
|
+ md = EVP_get_digestbyname("md5");
|
|
|
+ EVP_MD_CTX_init (&mdctx);
|
|
|
+ EVP_DigestInit_ex (&mdctx, md, NULL);
|
|
|
+
|
|
|
+ if((stream = fopen(file1, "r")) == NULL) {
|
|
|
+ printf ("\nCant open file %s. Error is: %d\n", file1, errno);
|
|
|
+ exit(1);
|
|
|
+ }
|
|
|
+
|
|
|
+ while(fgets(buffer, 2048, stream)) {
|
|
|
+ EVP_DigestUpdate (&mdctx, buffer, 2048);
|
|
|
+ clear_buffer(buffer, 2048);
|
|
|
+ }
|
|
|
+
|
|
|
+ EVP_DigestFinal_ex (&mdctx, md_value1, &md_len);
|
|
|
+ EVP_MD_CTX_cleanup (&mdctx);
|
|
|
+ fclose(stream);
|
|
|
+
|
|
|
+ EVP_MD_CTX_init (&mdctx);
|
|
|
+ EVP_DigestInit_ex (&mdctx, md, NULL);
|
|
|
+
|
|
|
+ if((stream = fopen(file2, "r")) == NULL) {
|
|
|
+ printf ("\nCant open file %s. Error is: %d\n", file1, errno);
|
|
|
+ exit(1);
|
|
|
+ }
|
|
|
+
|
|
|
+ while(fgets(buffer, 2048, stream)) {
|
|
|
+ EVP_DigestUpdate (&mdctx, buffer, 2048);
|
|
|
+ clear_buffer(buffer, 2048);
|
|
|
+ }
|
|
|
+
|
|
|
+ EVP_DigestFinal_ex (&mdctx, md_value2, &md_len);
|
|
|
+ EVP_MD_CTX_cleanup (&mdctx);
|
|
|
+ fclose(stream);
|
|
|
+
|
|
|
+ equals = 1;
|
|
|
+ for (i = 0; i < md_len; i++)
|
|
|
+ if (md_value1[i] != md_value2[i])
|
|
|
+ equals=0;
|
|
|
+
|
|
|
+ if(equals) {
|
|
|
+ printf("MATCH!\n");
|
|
|
+ return 1;
|
|
|
+ } else {
|
|
|
+ printf("\n");
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+void clear_buffer(char *buffer, int size) {
|
|
|
+ int i;
|
|
|
+ for (i=0; i<size; i++)
|
|
|
+ buffer[i] = 0;
|
|
|
+}
|
|
|
+
|