Saturday, December 8, 2018

codingdirectional: Compare and delete files with the same content with python

Welcome back to this new chapter of the delete duplicate application project, in the previous chapter this python program has successfully deleted the duplicate files inside the nested folders, however it has not really delete the file with the same content but instead just deletes the duplicate file with the same name as the selected one. Thus in this chapter we are going one step further to only delete the file with the same content and leave the one with the same name alone. First of all we will include the full path to the file which we have selected as the forth parameter when we create a new remove thread instance.
from tkinter import *
from tkinter import filedialog
from Remove import Remove

win = Tk() # 1 Create instance
win.title("Multitas") # 2 Add a title
win.resizable(0, 0) # 3 Disable resizing the GUI
win.configure(background='black') # 4 change background color

# 5 Create a label
aLabel = Label(win, text="Remove duplicate", anchor="center")
aLabel.grid(column=0, row=1)
aLabel.configure(foreground="white")
aLabel.configure(background="black")

# 6 Create a selectFile function to be used by button
def selectFile():

    fullfilename = filedialog.askopenfilename(initialdir="/", title="Select file")
    if(fullfilename != ''):
        filename = fullfilename.split('/')[-1] # this is for the windows separator only
        folder = filedialog.askdirectory() # 7 open a folder then create and start a new remove thread to delete the duplicate file
        if(folder != ''):
            folder = folder.replace('/', '\\')
            remove = Remove(folder, aLabel, filename, fullfilename)
            remove.start()

# 8 Adding a Button
action = Button(win, text="Select File", command=selectFile)
action.grid(column=0, row=0) # 9 Position the button
action.configure(background='brown')
action.configure(foreground='white')

win.mainloop()  # 10 start GUI
Next we will modify the remove thread class by introducing in the filecmp module which will be used to compared the content of two files. If a file has the same content as the selected file then it will get deleted or else it won’t get deleted even if it has a same name as the selected file.
import threading
import os
import filecmp

class Remove(threading.Thread):

   def __init__(self, massage, aLabel, filename, fullfilename):

      threading.Thread.__init__(self)
      self.massage = massage
      self.label = aLabel
      self.filename, self.file_extension = os.path.splitext(filename)
      self.fullfilename = fullfilename

   def run(self):

      filepaths = os.listdir(self.massage)

      for filepath in list(filepaths):
         os.chdir(self.massage)
         if(os.path.isfile(filepath)):
            filename, file_extension = os.path.splitext(filepath)
            self.remove_file(file_extension, filepath)
         else:
            self.delete_duplicate(os.path.join(self.massage, filepath))
      return

   def delete_duplicate(self, folder): # sub method to pass folder to

      filepaths = os.listdir(folder)

      for filepath in list(filepaths):
         os.chdir(folder)
         if(os.path.isfile(filepath)):
            filename, file_extension = os.path.splitext(filepath)
            self.remove_file(file_extension, filepath)
         else:
            self.delete_duplicate(os.path.join(folder, filepath))

   def remove_file(self, file_extension, filepath):
      if (file_extension == self.file_extension):
         if filecmp.cmp(filepath, self.fullfilename, shallow=False):
            os.remove(filepath)
The python program above runs successfully without any problem. With that we can now come back and look at the remove thread class again and further modify it in the next coming chapter so that it can handle the heavy work load such as to search for a few different type of duplicate files at the same time! If you do enjoy this tutorial make sure you subscribe to the rss feed of this website.

from Planet Python
via read more

No comments:

Post a Comment

TestDriven.io: Working with Static and Media Files in Django

This article looks at how to work with static and media files in a Django project, locally and in production. from Planet Python via read...