Commit 35d25c94 by kollo

new: dupdel.bas

parent d88830f1
#!/usr/bin/xbasic
' delete duplicate files or replace them with symbolic links
'
' (c) by Markus Hoffmann 2017
'
i=1
verbose=0
recursive=FALSE
dokill=FALSE
interactive=FALSE
masterpath$=""
slavepath$=""
WHILE LEN(PARAM$(i))
IF LEFT$(PARAM$(i))="-"
IF param$(i)="--help" OR PARAM$(i)="-h"
@intro
@using
ELSE IF PARAM$(i)="--version"
@intro
QUIT
ELSE IF PARAM$(i)="--interactive" OR PARAM$(i)="-i"
interactive=TRUE
ELSE IF PARAM$(i)="--recursive" OR PARAM$(i)="-r"
recursive=TRUE
ELSE IF PARAM$(i)="--verbose" OR PARAM$(i)="-v"
INC verbose
ELSE IF PARAM$(i)="--doit" OR PARAM$(i)="-k"
dokill=TRUE
ELSE IF PARAM$(i)="--master"
INC i
IF LEN(PARAM$(i))
masterpath$=PARAM$(i)
ENDIF
ELSE IF PARAM$(i)="--slave"
INC i
IF LEN(PARAM$(i))
slavepath$=PARAM$(i)
ENDIF
ELSE IF PARAM$(i)="-m"
INC i
IF LEN(PARAM$(i))
minlen=VAL(PARAM$(i))
ENDIF
ELSE IF PARAM$(i)="-o"
INC i
IF LEN(PARAM$(i))
outputfilename$=PARAM$(i)
ENDIF
ELSE
collect$=collect$+PARAM$(i)+" "
ENDIF
ELSE
inputfile$=PARAM$(i)
ENDIF
INC i
WEND
IF not EXIST(masterpath$) OR NOT EXIST(slavepath$)
PRINT "You need to specify two paths with --master an --slave"
PRINT "Abort."
QUIT
ENDIF
IF masterpath$=slavepath$ or inode(masterpath$)=inode(slavepath$)
PRINT "Master and Slave paths are the same!"
PRINT "Abort."
QUIT
ENDIF
mask$="*"
DIM masterfile$(200000)
DIM slavefile$(200000)
DIM masterlen(200000)
DIM slavelen(200000)
DIM mastermd5$(200000)
DIM slavemd5$(200000)
anzmasterfiles=0
anzslavefiles=0
PRINT "Master: ";masterpath$;" --> ";
FLUSH
@stepdir(masterpath$)
DIM u%(anzfiles)
FOR i=0 TO anzfiles-1
u%(i)=i
NEXT i
SORT filelen(),anzfiles,u%()
' Now list all files found (with full path name)
IF anzfiles>0
FOR i=0 TO anzfiles-1
masterfile$(i)=files$(u%(i))
masterlen(i)=filelen(i)
' PRINT str$(filelen(i));" ";files$(u%(i))
NEXT i
ENDIF
PRINT "Found ";anzfiles;" files in ";anzdir;" directories."
anzmasterfiles=anzfiles
PRINT "Slave: ";slavepath$;" --> ";
FLUSH
@stepdir(slavepath$)
DIM u%(anzfiles)
FOR i=0 TO anzfiles-1
u%(i)=i
NEXT i
SORT filelen(),anzfiles,u%()
' Now list all files found (with full path name)
IF anzfiles>0
FOR i=0 TO anzfiles-1
slavefile$(i)=files$(u%(i))
slavelen(i)=filelen(i)
' PRINT str$(filelen(i));" ";files$(u%(i))
NEXT i
ENDIF
PRINT "Found ";anzfiles;" files in ";anzdir;" directories."
anzslavefiles=anzfiles
PRINT "compare..."
DIM candidate$(10000)
lastidx=0
startidx=0
startidx2=0
IF minlen>0
WHILE slavelen(startidx)<minlen
INC startidx
WEND
WHILE masterlen(startidx2)<minlen
INC startidx2
WEND
PRINT "seek forward --> ";startidx;"/";startidx2
ENDIF
lastidx=startidx2
FOR i=startidx TO anzslavefiles-1
IF verbose>1
PRINT "Try: ";slavefile$(i);" ";
PRINT "last idx=";lastidx;"/";anzmasterfiles
ENDIF
anzcan=0
anzmatch=0
FOR j=lastidx TO anzmasterfiles-1
IF masterlen(j)=slavelen(i)
IF anzcan=0
lastidx=j
ENDIF
' PRINT "Have candidate: ";masterfile$(j)
candidate$(anzcan)=masterfile$(j)
INC anzcan
ENDIF
EXIT if masterlen(j)>slavelen(i)
NEXT j
IF anzcan
' PRINT "Have ";anzcan;" candidates. for ";
t$=system$("md5sum "+enclose$(slavefile$(i)))
SPLIT t$," ",0,t$,a$
' print t$
FOR k=0 TO anzcan-1
s$=system$("md5sum "+enclose$(candidate$(k)))
SPLIT s$," ",0,s$,a$
' print s$;" ";candidate$(k);" ";
IF s$=t$
' PRINT i;" ";INT(i/anzslavefiles*100);"% have match for: ";slavelen(i);" ";slavefile$(i)
' PRINT "*MATCH!*"
lastmatch=k
INC anzmatch
ELSE
' PRINT
ENDIF
NEXT k
IF anzmatch
PRINT INT(i/anzslavefiles*100);"% have ";anzmatch;" matches for: ";slavelen(i);" ";slavefile$(i)
IF verbose
FOR k=0 TO anzcan-1
if k=lastmatch
PRINT "Candidates: ";candidate$(k);" (*)"
else
PRINT "Candidates: ";candidate$(k)
endif
NEXT k
ENDIF
IF interactive
again:
PRINT "[Q] Quit , [d] delete , [l] symbolic link, ENTER do nothing"
INPUT a$
IF a$="Q"
QUIT
ELSE if a$="d"
PRINT "DELETE"
KILL slavefile$(i)
ELSE if a$="l"
PRINT "SYMBOLIC LINK"
KILL slavefile$(i)
a$=candidate$(lastmatch)
if left$(a$)<>"/"
a$=dir$(0)+"/"+a$
endif
SYSTEM "ln -s "+enclose$(a$)+" "+enclose$(slavefile$(i))
ELSE if LEN(a$)
PRINT "Unknown command. Again:"
GOTO again
ENDIF
ELSE if dokill
PRINT "DELETE"
KILL slavefile$(i)
ENDIF
ENDIF
ENDIF
NEXT i
QUIT
PROCEDURE stepdir(root$)
LOCAL dirpointer,a$
DIM files$(200000)
DIM dirs$(20000)
DIM filelen(200000)
anzfiles=0
anzdir=1
dirs$(0)=root$ ! This is the starting directory
dirpointer=0
WHILE dirpointer<anzdir
ON ERROR CONT ! Skip any error like permission denied or so....
a$=FSFIRST$(dirs$(dirpointer),"*")
WHILE len(a$)
SPLIT a$," ",0,typ$,name$
IF typ$="d" ! Is it a directory?
IF name$<>"." AND name$<>".."
dirs$(anzdir)=dirs$(dirpointer)+"/"+name$
INC anzdir
ENDIF
ELSE
IF GLOB(name$,mask$) ! Check if the filename matches the pattern...
files$(anzfiles)=dirs$(dirpointer)+"/"+name$
filelen(anzfiles)=SIZE(files$(anzfiles))
INC anzfiles
ENDIF
ENDIF
ON ERROR CONT ! Skip any error like permission denied or so....
a$=FSNEXT$()
WEND
INC dirpointer
WEND
RETURN
PROCEDURE intro
PRINT "dupdel.bas V.1.00 (c) Markus Hoffmann 2017"
VERSION
RETURN
PROCEDURE using
PRINT "Usage: dupdel.bas [options] "
PRINT "Options:"
PRINT " -h, --help Display this information"
PRINT " --interactive, -i run in interactive mode"
PRINT " --master <path> specify master path"
PRINT " --slave <path> specify slave path"
PRINT " -m size minimal file size"
PRINT " --verbose, -v be more verbose"
PRINT " --doit, -k run auto-delete mode (careful!)"
PRINT " -r recursive mode"
RETURN
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment