//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program; if not, write to the Free Software
//  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//
//
//  clonekill.c : kills duplicate files on hard disk
//
//
//  norbert heller 1997 - 1998
//
#define INCL_WIN
#define INCL_GPI
#define INCL_PM

#include <os2.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

#include <time.h>
#include "md5.h"


// MD 5 code is much faster..
// #define CRC32_CODE
//

#ifdef CRC32_CODE
  #include "crc32.h"
#endif

// for the settings
#include "clonekill.h"





//
// Process the filename and do something with it
//
//
void ProcessFileName(CHAR * szFileN)
{
  FILE *file;

  MD_CTX context;

  int len, i;
  unsigned char buffer[1024], digest[16];

  char szType[10];
  char szFile[CCHMAXPATH];
  char szCheck[35];



  // getting extension of file
  pF = strrchr( szFileN , '.');

  if(pF  != NULL )
      {
      pF ++ ;
      strcpy(szType, pF);
      strlwr(szType);
      }


    // check if we have the extension within the files ..
    pF = strstr(settings.szFiles, szType);
    if(pF != NULL)
    {


#ifdef ADD_DB

    pF1= strrchr( szFileN , '\\');
    if(pF1  != NULL )
        {
        pF1 ++;
        strcpy(szFile, pF1);
        }
#endif



//    sprintf(clone[numclon].szFilename, "%s", szFileN);

    if ((file = fopen (szFileN, "rb")) != NULL)
    {
    //
    // if we wanted to use CRC 32 code ?
    //

#ifdef CRC32_CODE
    sprintf(szCheck, "%08lX",calc32crc(file));

#ifndef ADD_DB
    strcpy(clone[numclon].szCheck, szCheck);
#endif


#else

    MDInit (&context);

    // use only a few bytes
    while(len = fread (buffer, 1, 1024, file))
          MDUpdate (&context, buffer, len);

    MDFinal (digest, &context);

    strcpy(szCheck, "");

    for (i = 0; i < 16; i++)
      {
      sprintf(szBuffer, "%02x", digest[i]);
      strcat(szCheck, szBuffer);
      }
#endif

    fclose (file);

#ifndef ADD_DB
    strcpy(clone[numclon].szCheck, szCheck);
    strcpy(clone[numclon].szFilename, szFileN);

    if((numclon % 1000 ) ==0)
        printf ("File # %d  %s\n", numclon,  clone[numclon].szFilename);


#else
    // print all 100 files some putput

    if((numclon % 100 ) ==0)
     printf ("Processing # %d %s  -> %s\n", numclon, szFileN, szCheck);

     // write into file
     fprintf(dbfile, "%s;%s\n", szFile, szCheck);
#endif

     numclon++;
    }  // file opened ok
   } // ok type
}




// ****************************************************************
//
// * Function:  getpath(string, path, filename)                     *
// *                                                                *
// * Purpose:  This function gets the PATH and FILENAME of the file *
// *           target contained in STRING.  The path will end with  *
// *           the '\' char if a path is supplied.                  *
// *                                                                *
// * Used By:  RecursiveFindFile()                                  *
// ****************************************************************

VOID getpath(CHAR *string,CHAR *path,CHAR *filename)
{
  int len;
  int LastSlashPos;

  len = strlen(string);                // Get length of full file
                                       // spec
  LastSlashPos = len;                  // Get CCHMAXPATH pos of last '\'

    // Step back through string until at begin or at '\' char

  while (string[LastSlashPos] != '\\' && LastSlashPos >= 0)
    --LastSlashPos;

    // Copy filespec up to and including last '\' to path
  memcpy(path, string, LastSlashPos+1);
  path[LastSlashPos+1] = '\0';

    // Get file name from filespec (just after last '\')
  strcpy(filename, &string[LastSlashPos+1]);

}



//****************************************************************************
// * Function: RecursiveFindFile( FileSpec, lpd, smask, dmask, options )        *
// *                                                                            *
// * Purpose:  Finds all files starting with FileSpec, and will look down the   *
// *           directory tree if required.                                      *
// *                                                                            *
// * Params:   FileSpec - ASCIIZ string which designates filespec to search     *
// *                       for.                                                 *
// *                                                                            *
// *           options  - The search/output options.  The following options     *
// *                       may be ORed together when calling this function:     *
// *                                                                            *
// *                    FIRST_TIME  - Indicates this is initial call.  This     *
// *                                   should always be used.                   *
// *                    RECURSE     - Indicates that function should search     *
// *                                   all child subdirectories recursively.    *
// *                    DO_DIRS     - Indicates that directories should be      *
// *                                   included in the search.                  *
// *                    DO_FILES    - Indicates that files should be included   *
// *                                   in the search.                           *
// *                    NAME_ONLY   - Indicates that the output should be       *
// *                                   restricted to filespecs only.            *
// *                    EDITABLE_TIME - Indicates time and date fields should   *
// *                                   be output as one timestamp.              *
// *                                                                            *
// * Used By:  SysFileTree()                                                    *
// *                                                                            *
// * original code by rexxutils                                                 *
// *                                                                            *
// *                                                                            *
// ****************************************************************************

LONG RecursiveFindFile(CHAR *FileSpec, ULONG options)
{
  FILEFINDBUF3 finfo;                  // OS2 File Find Utility
                                       // Structure
  ULONG   SearchCount = 1;               // Num things searching for
                                       // per DosFindFirst
  HDIR    DirHandle = 0xFFFF;          // Directory handle used by
                                       // DosFindFirst()
  CHAR    path[CCHMAXPATH];                   // Path to target file(s)
  CHAR    filename[CCHMAXPATH];               // Current file name
  CHAR    szFileSpec[CCHMAXPATH];

  BOOL    IsaDotdir = FALSE;           // Boolean for determining if
                                       // file is ./..
  BOOL    IsaSubdir = FALSE;           // Boolean for determining if
                                       // file is a dir

  //   *******************************************************************
  //   * Search Strategy Explained:                                        *
  //   *                                                                   *
  //   * Get path and filespec to be searched.                             *
  //   *                                                                   *
  //   * If this is the initial call (see FIRST_TIME) and                  *
  //   * searching recursively, make sure to look for the                  *
  //   * original filespec in the initial directory.                       *
  //   *                                                                   *
  //   * From then on, look for a filespec of *.* to expand                *
  //   * the subdirectories.  Once a subdirectory is expanded,             *
  //   * search for the initial filespec again.                            *
  //   *******************************************************************

  getpath(FileSpec, path, filename);

  // ?? otherwise error with pointer..
  strcpy( szFileSpec, FileSpec);

  // *******************************************************************
  //   * Initial Recursive case:                                           *
  //   *                                                                   *
  //   * This section of code recurses to call itself in the               *
  //   * non-recursive form.  It will allow the function to                *
  //   * gather all the files in the initial subdirectory.                 *
  //   *                                                                   *
  //   * It also saves the initial filespec to be searched                 *
  //   * for in all directories which will be expanded (see                *
  //   * szFileName).  From then on, the recursive case               *
  //   * will search for directories by examining all files                *
  //   * matching the spec *.*.                                            *
  // *******************************************************************

  // Only executed first time
  if ((options&FIRST_TIME) && (options&RECURSE))
  {
    if (RecursiveFindFile(szFileSpec, options&~RECURSE&~FIRST_TIME))
      return INVALID_ROUTINE;

    strcpy(szFileName, filename);
//    printf("file 1 %s \n",filename );

    strcpy(filename, "*.*");

    sprintf(szFileSpec, "%s*.*", path);

//    printf("file 2 %s \n",filename );

    options &= ~FIRST_TIME;
  }

  //    *******************************************************************
  //    * Non-Recursive case:                                               *
  //    *                                                                   *
  //    * This section of code finds all files matching the                 *
  //    * filespec (filtering out . and ..) and stores them                 *
  //    *******************************************************************

  if (!(options & RECURSE))
  {

    if (!DosFindFirst((PSZ)szFileSpec, (PHDIR)&DirHandle, (ULONG)
        AllAtts, (PVOID)&finfo, (ULONG)sizeof(finfo), (PULONG)
        &SearchCount, FIL_STANDARD))
    {

      IsaSubdir = (BOOL)(finfo.attrFile & FILE_DIRECTORY);
      IsaDotdir = (BOOL)(!strcmp(finfo.achName, ".") || !strcmp(finfo.achName, ".."));

      if (!IsaDotdir && ((IsaSubdir && (options&DO_DIRS)) ||
          (!IsaSubdir && (options&DO_FILES))))
        {

        sprintf(szTempFile, "%s%s", path, finfo.achName);

        DosQueryPathInfo(szTempFile, 5, szTrueName, sizeof(szTrueName));

        // print out value
        // do something to the file ..

        ProcessFileName(szTempFile);
        }


      // Get the rest of the files
      while (!DosFindNext((HDIR)DirHandle, (PVOID)&finfo, (ULONG)sizeof(finfo), (PULONG)&SearchCount))
        {

        IsaSubdir = (BOOL)(finfo.attrFile & FILE_DIRECTORY);
        IsaDotdir = (BOOL)(!strcmp(finfo.achName, ".") || !strcmp(finfo.achName, ".."));


      if (!IsaDotdir && ((IsaSubdir && (options&DO_DIRS)) ||
          (!IsaSubdir && (options&DO_FILES))))
          {
          sprintf(szTempFile, "%s%s", path, finfo.achName);

            // Get full path
          DosQueryPathInfo(szTempFile, 5, szTrueName, sizeof(szTrueName));

          // do something to the file ..
          ProcessFileName(szTempFile);

          }
      } // while
    }
    DosFindClose(DirHandle);
  }

  // *******************************************************
  // * Recursive case:                                       *
  // *                                                       *
  // * This section of code searches for directories given   *
  // * the current FileSpec.  When one is found, the         *
  // * function is called in the non-recursive mode to find  *
  // * all files matching the initial filespec.  It is also  *
  // * called in the recursive mode to expand all subdirect- *
  // * ories under the new found directory.                  *
  // ********************************************************

  else
  {

    if (!DosFindFirst((PSZ)szFileSpec, (PHDIR)&DirHandle,
        (ULONG)AllAtts, (PVOID)&finfo, (ULONG)sizeof(finfo),
        (PULONG)&SearchCount, FIL_STANDARD)) {

      IsaSubdir = (BOOL)(finfo.attrFile&FILE_DIRECTORY);
      IsaDotdir = (BOOL)(!strcmp(finfo.achName, ".") || !strcmp(finfo.achName, ".."));

      if (!IsaDotdir && IsaSubdir)
      {
        sprintf(szTempFile, "%s%s\\%s", path, finfo.achName, szFileName);

        if (RecursiveFindFile(szTempFile, options&~RECURSE))
          return INVALID_ROUTINE;

        sprintf(szTempFile, "%s%s\\%s", path, finfo.achName, filename);

        if (RecursiveFindFile(szTempFile, options))
          return INVALID_ROUTINE;
      }

        // Get the rest of the files ................................

      while (!DosFindNext((HDIR)DirHandle, (PVOID)&finfo,
            (ULONG)sizeof(finfo), (PULONG)&SearchCount))
            {

        IsaSubdir = (BOOL)(finfo.attrFile&FILE_DIRECTORY);
        IsaDotdir = (BOOL)(!strcmp(finfo.achName, ".") || !strcmp(finfo.achName, ".."));

        if (!IsaDotdir && IsaSubdir)
        {
          sprintf(szTempFile, "%s%s\\%s", path, finfo.achName, szFileName);

          if (RecursiveFindFile(szTempFile, options&~RECURSE))
            return INVALID_ROUTINE;

          sprintf(szTempFile, "%s%s\\%s", path, finfo.achName,filename);

          if (RecursiveFindFile(szTempFile, options))
             return INVALID_ROUTINE;

        }
      }
    }
    DosFindClose(DirHandle);
  }
  return VALID_ROUTINE;
}


//
//
//
//
//
// reads settings from settings.dat
//
//
//
int read_settings(void)
{
  FILE *hfile ;

  CHAR szSatz[256] ;
  ULONG ulDummy ;
  LONG lI ;
  CHAR szT[200];

  CHAR szID[100];
  CHAR szValue[CCHMAXPATH];

// ---------------------------------------------------------------------------


  if((hfile = fopen(DATAFILE,"r"))  != NULL)
  {
    // Alle Stze der Datei verarbeiten
    while (fgets(szSatz,sizeof(szSatz),hfile) != NULL)
    {
      lI = strlen(szSatz) ;
      szSatz[lI-1] = '\0' ;


      if((szSatz[0] != IGNORELINE) && !(lI < 0))
      {
      // gettings values

      get_token(szID,   szSatz, DELIMITER, 1, FALSE);
      get_token(szValue,szSatz, DELIMITER, 2, FALSE);

        // now get all values and enter them in the structure
        //

        if (strncmp(szID, DIRECTORIES,strlen(DIRECTORIES)) == 0)
            {
            strcpy(settings.szDirectories, szValue) ;
            strlwr(settings.szDirectories);
            }

        else
        if (strncmp(szID, FILENAMES,strlen(FILENAMES)) == 0)
            {
            strcpy(settings.szFiles, szValue) ;
            strlwr(settings.szFiles);
            }

        // kill only teh following files
        else
        if (strncmp(szID, KILLONLY,strlen(KILLONLY)) == 0)
            {
            strcpy(settings.szKillOnlyDir, szValue) ;
            strlwr(settings.szKillOnlyDir);
            }

        //  recursive ?
        else if (strncmp(szID, RECURSIVE, strlen(RECURSIVE)) == 0)
          {
          if ((strncmp(szValue,"YES",strlen("YES")) == 0) || (strncmp(szValue,"yes",strlen("yes")) == 0))
           settings.bRecursive = TRUE ;
          else
           settings.bRecursive = FALSE ;
          }

        // bUnattended
        else if (strncmp(szID, UNATTENDED, strlen(UNATTENDED)) == 0)
          {
          if ((strncmp(szValue,"YES",strlen("YES")) == 0) || (strncmp(szValue,"yes",strlen("yes")) == 0))
           settings.bUnattended = TRUE ;
          else
           settings.bUnattended = FALSE ;
          }

       } // ignore line ?
      } // while
    fclose(hfile);
    }

    printf("ReadSettings dirs : <%s>\n",settings.szDirectories);

return 0;
}




//
// read in the data base file and store values into array
//
void read_database(void)
{
  CHAR szSatz[400] ;
  CHAR szT[400] ;
  LONG lI ;

// ---------------------------------------------------------------------------

  if(dbfile  != NULL)
  {
    // All lines
    while (fgets(szSatz,sizeof(szSatz),dbfile) != NULL)
    {
      lI = strlen(szSatz) ;
      szSatz[lI-1] = '\0' ;

      if( lI > 0)
      {
      get_token(szT,  szSatz, ";", 1, FALSE);
      strcpyb(clone[numclon].szFilename,  szT);

      get_token(szT,     szSatz, ";", 2, FALSE);
      strcpyb(clone[numclon].szCheck,  szT);

        if((numclon % 3000 ) ==0)
            printf ("DB File # %d %s=%s\n", numclon,  clone[numclon].szFilename, clone[numclon].szCheck);

        numclon++;

       } // lI > 0
      } // while
    fclose(dbfile);
    }
}




//
//
//
//
//
// main routine
//
//
//
int main (int argc, char *argv[])
{
  char szPathname[CCHMAXPATH];
  char szBuffer[1000];
  int iDrive ,iC, iPerc;
  ULONG i, j, k, ulNewFiles, ulMaxRuns, ulRuns;

  FILE * f;

  ULONG       options;                              // Mask of options
  options = FIRST_TIME | DO_FILES | DO_DIRS;        // Clear if we should not

  CHAR szTok[20];

  numclon = 0;

#ifndef ADD_DB

  // open kill file and delete old one
  f = fopen("clonekill.cmd", "w");

  // open database file for writing
  dbfile = fopen(DATABASEFILE, "r");

#else

  // open database file for writing
  dbfile = fopen(DATABASEFILE, "a");

#endif


#ifdef CRC32_CODE
     crctabinit();
#endif


    // read in settings file to check values
    rc = read_settings();

    if(settings.bRecursive)
       options = options | RECURSE;

#ifndef ADD_DB
    read_database();

    // only the new files, store value for later
    ulNewFiles = numclon;

    // now check all directories for files
    iC = get_number_of_tokens(settings.szDirectories, ";");

//  printf("dirs : %d <%s>\n",iC, settings.szDirectories);

    for(j = 1; j <= iC ; j ++)
        {
        get_token(szPathname, settings.szDirectories, ";", j, FALSE);

        printf("Searching Directory <%s>\n", szPathname);

        sprintf(szBuffer, "%s\\*.*", szPathname);
        rc = RecursiveFindFile(szBuffer, options);
        }

    fprintf(f, "/*                                              */ \n");
    fprintf(f, "/* Double Files Deleter                         */ \n");
    fprintf(f, "/*                                              */ \n");
    fprintf(f, "/* by norbert heller                            */ \n");
    fprintf(f, "/*                                              */ \n");


    // now check all directories for files
    iC = get_number_of_tokens(settings.szKillOnlyDir, ";");

    ulMaxRuns = numclon * (numclon - ulNewFiles);
    printf("max Runs %d \n",ulMaxRuns);

    // now start sorting out files
    for(i=0; i<=numclon ; i++)

//       for(j=0; j<=numclon ; j++)
         for(j=ulNewFiles; j<=numclon ; j++)
             {
/*
             ulRuns++;

             // only all 5000 values , check if done
             if( (ulRuns % 5000) == 0)
               {
               iPerc = (ulRuns * 100) / ulMaxRuns;
               printf("sort out files : done %d %%\r",iPerc);
               }
*/
             if(i != j && strlen(clone[i].szFilename) > 0)
                 {
                 if(strcmp(clone[i].szCheck, clone[j].szCheck ) == 0)
                     {
                        // ok, we found one

                        // check if we have preferred directories ?
                        if((strlen(settings.szKillOnlyDir) > 0 ) ||  iC > 0 )
                            {
                            for(k=1; k<=iC ; k++)
                                {
                                get_token(szBuffer, settings.szKillOnlyDir, ";", k, FALSE);
                                strcpyb(szTok, szBuffer);

                                pF = strstr(clone[j].szFilename, szBuffer);
                                if (pF != NULL && (strlen(szTok) > 0))
                                    {
//                                  printf("%s %s :: %s = %s\n",szBuffer, settings.szKillOnlyDir, clone[j].szFilename, clone[i].szFilename);
                                    printf("%s = %s\n",clone[j].szFilename, clone[i].szFilename);

                                    if(settings.bUnattended)
                                        {
                                        sprintf(szBuffer, "del %s",clone[j].szFilename);
                                        rc = system(szBuffer);
                                        }
                                        else
                                        {
                                        // write into file
                                        fprintf(f, "\n");
                                        fprintf(f, "/* %s == %s */ \n",clone[j].szFilename, clone[i].szFilename);
                                        fprintf(f, "\"del %s\"\n",clone[j].szFilename);

                                        // erase values so we dont kill a second time the other file
                                        strcpy(clone[j].szFilename,"");
                                        strcpy(clone[j].szCheck,"");

                                        }
                                    }
                                }
                            }

                        // ok, kill the first occurance
                        else
                        {
                                    if(settings.bUnattended)
                                        {
                                        sprintf(szBuffer, "del %s",clone[i].szFilename);
                                        rc = system(szBuffer);
                                        }
                                        else
                                        {
                                        // write into file
                                        fprintf(f, "\n");
                                        fprintf(f, "/* %s == %s */ \n",clone[i].szFilename,clone[j].szFilename);
                                        fprintf(f, "\"del %s\"\n",clone[i].szFilename);

                                        // erase values so we dont kill a second time the other file
                                        strcpy(clone[i].szFilename,"");
                                        strcpy(clone[i].szCheck,"");
                                        }
                        }
                     }
                 }  // j
             } // i

  // close it
  if(f)
    fclose(f);

#else

  if(argc == 2)
    // use this for options e.g  "c:\*.*" ..
    rc = RecursiveFindFile(argv[1], options);
  else
    {
    printf("/*                                              */ \n");
    printf("/* Add to Database                              */ \n");
    printf("/*                                              */ \n");
    printf("/* clonekill_add.exe g:\\*.*                     */ \n");
    printf("/*                                              */ \n");
    printf("/*                                              */ \n");

    }
#endif

  // close Database file
  if(dbfile)
    fclose(dbfile);

  return (0);
}



