dufi.c 7.5 KB


  1. /*
  2. * DUplicate FInd 0.1
  3. *
  4. * Desc: Finds duplicate files and deletes them.
  5. * Author: Nikola Kotur <kotnik@ns-linux.org>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20. *
  21. */
  22. #include <sys/types.h>
  23. #include <sys/dir.h>
  24. #include <sys/stat.h>
  25. #include <sys/param.h>
  26. #include <errno.h>
  27. #include <stdio.h>
  28. #include <stdlib.h>
  29. #include <unistd.h>
  30. #include <openssl/evp.h>
  31. #define FALSE 0
  32. #define TRUE !FALSE
  33. extern int alphasort();
  34. extern int errno;
  35. typedef struct {
  36. char name[MAXPATHLEN];
  37. char short_name[FILENAME_MAX];
  38. size_t size;
  39. char md5[32];
  40. int delete;
  41. struct FILESTRUCT *next;
  42. } FILESTRUCT;
  43. int opt_delete=0;
  44. int is_dupes=0;
  45. int opt_recursive=0;
  46. int total_scaned=0, total_deleted=0;
  47. FILESTRUCT *file_first=NULL;
  48. FILESTRUCT *file_temp=NULL;
  49. FILESTRUCT *file_curr=NULL;
  50. void usage(void);
  51. int scan_dir(char *, int);
  52. void delete_list();
  53. void find_dupes();
  54. void delete_dupes();
  55. void print_list();
  56. int check_md5(char *, char *, char *, char *);
  57. void clear_buffer(char *, int);
  58. void add_directory(char *);
  59. int main(int argc, char *argv[])
  60. {
  61. char *opt_dir = NULL; /* Holds a name of start dir */
  62. extern char *optarg;
  63. extern int optind, optopt;
  64. int err_flag=0, c;
  65. if (argc == 1) {
  66. opt_delete = 0;
  67. } else {
  68. while ((c = getopt(argc, argv, ":drf:")) != -1) {
  69. switch (c) {
  70. case 'd':
  71. opt_delete = 1;
  72. break;
  73. case 'f':
  74. opt_dir = optarg;
  75. break;
  76. case 'r':
  77. opt_recursive = 1;
  78. break;
  79. case ':':
  80. fprintf(stderr, "dufi: Option -%c requires an operand\n", optopt);
  81. err_flag++;
  82. break;
  83. case '?':
  84. fprintf(stderr, "dufi: Unrecognized -%c option\n", optopt);
  85. err_flag++;
  86. break;
  87. }
  88. }
  89. if (err_flag) {
  90. usage();
  91. exit(2);
  92. }
  93. }
  94. if (!opt_dir) {
  95. opt_dir = getenv("PWD");
  96. }
  97. return scan_dir(opt_dir, opt_delete);
  98. }
  99. void usage(void) {
  100. printf("Dufi: deletes duplicate files.\n\n");
  101. printf("Usage:\n");
  102. printf("dufi [-d] [-r] [-f /path/to/directory]\n");
  103. printf("\t-d\tif specified, files will be deleted\n");
  104. printf("\t-r\ttraverse into subdirectories\n");
  105. printf("\t-f\tstarting directory\n");
  106. }
  107. void add_directory(char *dir_name) {
  108. int res, count, i;
  109. int file_select();
  110. char full_name[MAXPATHLEN];
  111. struct stat filestat;
  112. struct direct **files;
  113. FILESTRUCT *file_new;
  114. res = chdir(dir_name);
  115. if (res == -1) {
  116. fprintf(stderr, "dufi: Could not open: %s\n", dir_name);
  117. } else {
  118. count = scandir(dir_name, &files, file_select, alphasort);
  119. for (i=1; i<count+1; ++i) {
  120. strcpy(full_name, dir_name);
  121. strcat(full_name, "/");
  122. strcat(full_name, files[i-1]->d_name);
  123. stat(full_name, &filestat);
  124. if (S_ISREG(filestat.st_mode)) { /* If it's a file */
  125. total_scaned += 1;
  126. /* Make a new space */
  127. file_new = (FILESTRUCT *) malloc(sizeof(FILESTRUCT));
  128. /* Put data into it */
  129. strcpy(file_new->name, full_name);
  130. strcpy(file_new->short_name, files[i-1]->d_name);
  131. file_new->size = filestat.st_size;
  132. file_new->delete = 0;
  133. /* Make a link to previous */
  134. if (file_curr) {
  135. file_curr->next = (struct FILESTRUCT *) file_new;
  136. file_curr = file_new;
  137. } else {
  138. file_first = file_new;
  139. file_curr = file_new;
  140. }
  141. } else if (S_ISDIR(filestat.st_mode) && opt_recursive) { /* If it's a dir */
  142. add_directory(full_name);
  143. }
  144. }
  145. }
  146. }
  147. int scan_dir(char *directory, int delete) {
  148. add_directory(directory);
  149. printf("\n");
  150. find_dupes();
  151. if (is_dupes)
  152. delete_dupes();
  153. else
  154. printf("No duplicates.\n");
  155. delete_list();
  156. return 0;
  157. }
  158. int file_select(struct direct *entry) {
  159. if ((strcmp(entry->d_name, ".") == 0) ||
  160. (strcmp(entry->d_name, "..") == 0))
  161. return FALSE;
  162. else
  163. return TRUE;
  164. }
  165. void delete_list() {
  166. FILESTRUCT *curr;
  167. while (file_first->next) {
  168. curr = file_first;
  169. file_first = (FILESTRUCT *) file_first->next;
  170. free(curr);
  171. }
  172. free(file_first);
  173. }
  174. void print_list() {
  175. file_temp = file_first;
  176. while (file_temp != NULL) {
  177. printf("Name: %s Size: %d Delete: ", file_temp->name, file_temp->size);
  178. if(file_temp->delete)
  179. printf("Y\n");
  180. else
  181. printf("N\n");
  182. file_temp = (FILESTRUCT *) file_temp->next;
  183. }
  184. }
  185. void find_dupes() {
  186. FILESTRUCT *new_first;
  187. file_temp = file_first;
  188. while (file_temp) {
  189. if(!file_temp->delete) {
  190. new_first = (FILESTRUCT *) file_temp->next;
  191. while (new_first) {
  192. if ((file_temp->size == new_first->size)
  193. && check_md5(file_temp->name, new_first->name,
  194. file_temp->short_name, new_first->short_name)) {
  195. new_first->delete = 1;
  196. is_dupes = 1;
  197. total_deleted += 1;
  198. }
  199. new_first = (FILESTRUCT *) new_first->next;
  200. }
  201. }
  202. file_temp = (FILESTRUCT *) file_temp->next;
  203. }
  204. }
  205. void delete_dupes() {
  206. file_temp = file_first;
  207. size_t total_size = 0, total_kb, total_mb;
  208. printf("\nTotal files scaned: %d\n", total_scaned);
  209. if (total_deleted)
  210. printf("Total duplicates: %d\n\n", total_deleted);
  211. if (opt_delete && is_dupes)
  212. printf("Deleting:\n");
  213. else if (is_dupes)
  214. printf("Would delete:\n");
  215. while (file_temp) {
  216. if (file_temp->delete) {
  217. printf("%s\n", file_temp->name);
  218. if (opt_delete) {
  219. if (unlink(file_temp->name) == -1)
  220. printf("\tCouldn't delete: %s\n", file_temp->name);
  221. total_size += file_temp->size;
  222. }
  223. }
  224. file_temp = (FILESTRUCT *) file_temp->next;
  225. }
  226. if (opt_delete && is_dupes) {
  227. total_kb = total_size/1024;
  228. total_mb = total_kb/1024;
  229. printf("\nTotal freed: ");
  230. if (total_mb >= 1)
  231. printf("%d Mb\n", total_mb);
  232. else if (total_kb >= 1)
  233. printf("%d Kb\n", total_kb);
  234. else
  235. printf("%d b\n", total_size);
  236. }
  237. }
  238. int check_md5(char *file1, char *file2, char *short1, char *short2) {
  239. EVP_MD_CTX mdctx;
  240. const EVP_MD *md;
  241. unsigned char md_value1[EVP_MAX_MD_SIZE], md_value2[EVP_MAX_MD_SIZE];
  242. int md_len, i, equals;
  243. FILE *stream;
  244. char buffer[2048];
  245. printf("Found a match! Checking MD5 for: %s <==> %s ...", short1, short2);
  246. clear_buffer(buffer, 2048);
  247. OpenSSL_add_all_digests ();
  248. md = EVP_get_digestbyname("md5");
  249. EVP_MD_CTX_init (&mdctx);
  250. EVP_DigestInit_ex (&mdctx, md, NULL);
  251. if((stream = fopen(file1, "r")) == NULL) {
  252. printf ("\nCant open file %s. Error is: %d\n", file1, errno);
  253. exit(1);
  254. }
  255. while(fgets(buffer, 2048, stream)) {
  256. EVP_DigestUpdate (&mdctx, buffer, 2048);
  257. clear_buffer(buffer, 2048);
  258. }
  259. EVP_DigestFinal_ex (&mdctx, md_value1, &md_len);
  260. EVP_MD_CTX_cleanup (&mdctx);
  261. fclose(stream);
  262. EVP_MD_CTX_init (&mdctx);
  263. EVP_DigestInit_ex (&mdctx, md, NULL);
  264. if((stream = fopen(file2, "r")) == NULL) {
  265. printf ("\nCant open file %s. Error is: %d\n", file1, errno);
  266. exit(1);
  267. }
  268. while(fgets(buffer, 2048, stream)) {
  269. EVP_DigestUpdate (&mdctx, buffer, 2048);
  270. clear_buffer(buffer, 2048);
  271. }
  272. EVP_DigestFinal_ex (&mdctx, md_value2, &md_len);
  273. EVP_MD_CTX_cleanup (&mdctx);
  274. fclose(stream);
  275. equals = 1;
  276. for (i = 0; i < md_len; i++)
  277. if (md_value1[i] != md_value2[i])
  278. equals=0;
  279. if(equals) {
  280. printf("MATCH!\n");
  281. return 1;
  282. } else {
  283. printf("\n");
  284. return 0;
  285. }
  286. }
  287. void clear_buffer(char *buffer, int size) {
  288. int i;
  289. for (i=0; i<size; i++)
  290. buffer[i] = 0;
  291. }