Browse Source

initial code

Nikola Kotur 6 years ago
commit
0086ccb883
2 changed files with 356 additions and 0 deletions
  1. 20 0
      Makefile
  2. 336 0
      dufi.c

+ 20 - 0
Makefile

@@ -0,0 +1,20 @@
+CC = gcc
+CFLAGS = -g
+LDFLAGS = -lssl
+PREFIX = /usr/local/bin
+
+all: dufi
+
+dufi.o: dufi.c
+	$(CC) $(CFLAGS) -c dufi.c
+
+dufi: dufi.o
+	$(CC) $(LDFLAGS) -o dufi dufi.o
+
+clean:
+	rm -f *.o dufi
+
+rebuild: clean build
+
+install:
+	cp dufi $(PREFIX)

+ 336 - 0
dufi.c

@@ -0,0 +1,336 @@
+/*
+ * DUplicate FInd 0.1
+ *
+ * Desc: Finds duplicate files and deletes them.
+ * Author: Nikola Kotur <kotnik@ns-linux.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/dir.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openssl/evp.h>
+
+#define FALSE 0
+#define TRUE !FALSE
+
+extern int alphasort();
+extern int errno;
+
+typedef struct {
+	char name[MAXPATHLEN];
+	char short_name[FILENAME_MAX];
+	size_t size;
+	char md5[32];
+	int delete;
+	struct FILESTRUCT *next;
+} FILESTRUCT;
+
+int opt_delete=0;
+int is_dupes=0;
+int opt_recursive=0;
+int total_scaned=0, total_deleted=0;
+
+FILESTRUCT *file_first=NULL;
+FILESTRUCT *file_temp=NULL;
+FILESTRUCT *file_curr=NULL;
+
+void usage(void);
+int scan_dir(char *, int);
+void delete_list();
+void find_dupes();
+void delete_dupes();
+void print_list();
+int check_md5(char *, char *, char *, char *);
+void clear_buffer(char *, int);
+void add_directory(char *);
+
+int main(int argc, char *argv[])
+{
+	char *opt_dir = NULL; /* Holds a name of start dir */
+	extern char *optarg;
+	extern int optind, optopt;
+	int err_flag=0, c;
+
+	if (argc == 1) {
+		opt_delete = 0;
+	} else {
+		while ((c = getopt(argc, argv, ":drf:")) != -1) {
+			switch (c) {
+				case 'd':
+					opt_delete = 1;
+					break;
+				case 'f':
+					opt_dir = optarg;
+					break;
+				case 'r':
+					opt_recursive = 1;
+					break;
+				case ':':
+					fprintf(stderr, "dufi: Option -%c requires an operand\n", optopt);
+					err_flag++;
+					break;
+				case '?':
+					fprintf(stderr, "dufi: Unrecognized -%c option\n", optopt);
+					err_flag++;
+					break;
+			}
+		}
+		if (err_flag) {
+			usage();
+			exit(2);
+		}
+	}
+
+	if (!opt_dir) {
+		opt_dir = getenv("PWD");
+	}
+
+	return scan_dir(opt_dir, opt_delete);
+}
+
+
+void usage(void) {
+	printf("Dufi: deletes duplicate files.\n\n");
+	printf("Usage:\n");
+	printf("dufi [-d] [-r] [-f /path/to/directory]\n");
+	printf("\t-d\tif specified, files will be deleted\n");
+	printf("\t-r\ttraverse into subdirectories\n");
+	printf("\t-f\tstarting directory\n");
+}
+
+void add_directory(char *dir_name) {
+	int res, count, i;
+	int file_select();
+	char full_name[MAXPATHLEN];
+	struct stat filestat;
+	struct direct **files;
+	FILESTRUCT *file_new;
+
+	res = chdir(dir_name);
+	if (res == -1) {
+		fprintf(stderr, "dufi: Could not open: %s\n", dir_name);
+	} else {
+
+		count = scandir(dir_name, &files, file_select, alphasort);
+
+		for (i=1; i<count+1; ++i) {
+			strcpy(full_name, dir_name);
+			strcat(full_name, "/");
+			strcat(full_name, files[i-1]->d_name);
+			stat(full_name, &filestat);
+			if (S_ISREG(filestat.st_mode)) { /* If it's a file */
+				total_scaned += 1;
+				/* Make a new space */
+				file_new = (FILESTRUCT *) malloc(sizeof(FILESTRUCT));
+				/* Put data into it */
+				strcpy(file_new->name, full_name);
+				strcpy(file_new->short_name, files[i-1]->d_name);
+				file_new->size = filestat.st_size;
+				file_new->delete = 0;
+				/* Make a link to previous */
+				if (file_curr) {
+					file_curr->next = (struct FILESTRUCT *) file_new;
+					file_curr = file_new;
+				} else {
+					file_first = file_new;
+					file_curr = file_new;
+				}	
+			} else if (S_ISDIR(filestat.st_mode) && opt_recursive) { /* If it's a dir */
+				add_directory(full_name);
+			}
+		}
+	}
+}
+
+int scan_dir(char *directory, int delete) {
+	add_directory(directory);
+	printf("\n");
+	find_dupes();
+	if (is_dupes)
+		delete_dupes();
+	else
+		printf("No duplicates.\n");
+	delete_list();
+	return 0;
+}
+
+
+int file_select(struct direct *entry) {
+	if ((strcmp(entry->d_name, ".") == 0) ||
+		(strcmp(entry->d_name, "..") == 0))
+		return FALSE;
+	else
+		return TRUE;
+}
+
+
+void delete_list() {
+	FILESTRUCT *curr;
+	while (file_first->next) {
+		curr = file_first;
+		file_first = (FILESTRUCT *) file_first->next;
+		free(curr);
+	}
+	free(file_first);
+}
+
+
+void print_list() {
+	file_temp = file_first;
+	while (file_temp != NULL) {
+		printf("Name: %s   Size: %d   Delete: ", file_temp->name, file_temp->size);
+		if(file_temp->delete)
+			printf("Y\n");
+		else
+			printf("N\n");
+		file_temp = (FILESTRUCT *) file_temp->next;
+	}
+}
+
+
+void find_dupes() {
+	FILESTRUCT *new_first;
+	file_temp = file_first;
+	while (file_temp) {
+		if(!file_temp->delete) {
+			new_first = (FILESTRUCT *) file_temp->next;
+			while (new_first) {
+				if ((file_temp->size == new_first->size)
+						&& check_md5(file_temp->name, new_first->name,
+							file_temp->short_name, new_first->short_name)) {
+					new_first->delete = 1;
+					is_dupes = 1;
+					total_deleted += 1;
+				}
+				new_first = (FILESTRUCT *) new_first->next;
+			}
+		}
+		file_temp = (FILESTRUCT *) file_temp->next;
+	}
+}
+
+
+void delete_dupes() {
+	file_temp = file_first;
+	size_t total_size = 0, total_kb, total_mb;
+	printf("\nTotal files scaned: %d\n", total_scaned);
+	if (total_deleted)
+		printf("Total duplicates: %d\n\n", total_deleted);
+	if (opt_delete && is_dupes)
+		printf("Deleting:\n");
+	else if (is_dupes)
+		printf("Would delete:\n");
+	while (file_temp) {
+		if (file_temp->delete) {
+			printf("%s\n", file_temp->name);
+			if (opt_delete) {
+				if (unlink(file_temp->name) == -1)
+					printf("\tCouldn't delete: %s\n", file_temp->name);
+				total_size += file_temp->size;
+			}
+		}
+		file_temp = (FILESTRUCT *) file_temp->next;
+	}
+	if (opt_delete && is_dupes) {
+		total_kb = total_size/1024;
+		total_mb = total_kb/1024;
+		printf("\nTotal freed: ");
+		if (total_mb >= 1)
+			printf("%d Mb\n", total_mb);
+		else if (total_kb >= 1)
+			printf("%d Kb\n", total_kb);
+		else
+			printf("%d b\n", total_size);
+	}
+}
+
+
+int check_md5(char *file1, char *file2, char *short1, char *short2) {
+	EVP_MD_CTX mdctx;
+	const EVP_MD *md;
+	unsigned char md_value1[EVP_MAX_MD_SIZE], md_value2[EVP_MAX_MD_SIZE];
+	int md_len, i, equals;
+	FILE *stream;
+	char buffer[2048];
+
+	printf("Found a match! Checking MD5 for: %s <==> %s ...", short1, short2);
+
+	clear_buffer(buffer, 2048);
+
+	OpenSSL_add_all_digests ();
+	md = EVP_get_digestbyname("md5");
+	EVP_MD_CTX_init (&mdctx);
+	EVP_DigestInit_ex (&mdctx, md, NULL);
+
+	if((stream = fopen(file1, "r")) == NULL) {
+		printf ("\nCant open file %s. Error is: %d\n", file1, errno);
+		exit(1);
+	}
+
+	while(fgets(buffer, 2048, stream)) {
+		EVP_DigestUpdate (&mdctx, buffer, 2048);
+		clear_buffer(buffer, 2048);
+	}
+
+	EVP_DigestFinal_ex (&mdctx, md_value1, &md_len);
+	EVP_MD_CTX_cleanup (&mdctx);
+	fclose(stream);
+
+	EVP_MD_CTX_init (&mdctx);
+	EVP_DigestInit_ex (&mdctx, md, NULL);
+
+	if((stream = fopen(file2, "r")) == NULL) {
+		printf ("\nCant open file %s. Error is: %d\n", file1, errno);
+		exit(1);
+	}
+
+	while(fgets(buffer, 2048, stream)) {
+		EVP_DigestUpdate (&mdctx, buffer, 2048);
+		clear_buffer(buffer, 2048);
+	}
+
+	EVP_DigestFinal_ex (&mdctx, md_value2, &md_len);
+	EVP_MD_CTX_cleanup (&mdctx);
+	fclose(stream);
+
+	equals = 1;
+	for (i = 0; i < md_len; i++)
+		if (md_value1[i] != md_value2[i])
+			equals=0;
+	
+	if(equals) {
+		printf("MATCH!\n");
+		return 1;
+	} else {
+		printf("\n");
+		return 0;
+	}
+}
+
+
+void clear_buffer(char *buffer, int size) {
+	int i;
+	for (i=0; i<size; i++)
+		buffer[i] = 0;
+}
+