diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..25513b5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +minEnv/ +nonCompliant*.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..6fce100 --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +# Folder and file name checking tool +This windows tool checks file and folder names against the following criteria: + +## Folders +Folders names must follow one of the two conventions below: + +- Seven-digit ID number [underscore] four-digit date, e.g. `0002268_1865` +- Seven-digit ID number [underscore] four-digit date [underscore] three-digit month [underscore] three-digit month, e.g. `0002268_1865_JAN_JUN` + - Months should always be represented with three letters and matching is not case-sensitive + + +## Files +File names must be comprised of +- parent folder [underscore] four-digit number beginning `0001`, e.g. + - 0002091_1856_Jan_Jun/ + - 0002091_1856_Jan_Jun_0001.tif + - 0002091_1856_Jan_Jun_0002.tif + - 0002091_1856_Jan_Jun_0003.tif + - ... + +Matching is by default case-insensitive, but this functionality can be enabled at run-time. + +## Running the tool +The tool is supplied as an executable but can be run as a python script. The tool requires as an input a directory of directories, each containing files to be scanned. For instance, if the path +`C:\Users\JoeBloggs\Desktop\Files` is given, all directories within this directory, and their contents, will be scanned. Alternatively, the executable can be placed in the directory and run with an option to use the current directory. + +### Running the executable version +- Open the file `nameSchemeCheck.exe` +- Type `Y` and press `Enter` to run in case-sensitive filename checking mode, otherwise press `Enter` +- To scan the current direcory, enter `Y` + - The default behaviour is to scan a pre-configured directory +- Enter `N` to scan a directory of your choice + - Unrecognised inputs set the path to the default `\\P12B-NAS1\scandata2\HMD\RAW SCANS\Dave\FMP\Still to deliver to FMP` +- The tool checks if the directory exists and falls back to the default path if this is not the case +- Folder names and file names are scanned according to the conventions defined above +- Any folder or file names not fulfilling these criteria have their full path written to a file + - nonCompliantFolders.txt + - nonCompliantFiles.txt +- Files appear in the same directory as the executable + +### Running the python script +The python script was developed using python 3.7 and requires additional modules to run. To install these with `pip`, call +``` +pip install -r requirements.txt +``` +It is recommended to perform this step inside a virtual environment. + +To run the python script, call: + +``` +python nameSchemeCheck.py +``` + +- All steps described for the executable version apply to the python version +- Running the script allows the user to modify the default directory, hard-code case-matching options and provides a greater level of control over the program +- To recompile the script as an executable, call +``` +pyinstaller -F nameSchemeCheck.py +``` diff --git a/nameSchemeCheck.exe b/nameSchemeCheck.exe new file mode 100644 index 0000000..a2453f6 Binary files /dev/null and b/nameSchemeCheck.exe differ diff --git a/nameSchemeCheck.py b/nameSchemeCheck.py new file mode 100644 index 0000000..2541f7b --- /dev/null +++ b/nameSchemeCheck.py @@ -0,0 +1,102 @@ +import os, re +from tqdm import tqdm +__name__ = "__main__" + +def scanFolders(inputDirectory): + foldersFound=[] + for folder in os.listdir(inputDirectory): + if os.path.isdir(os.path.join(inputDirectory,folder)): + foldersFound.append(folder) + + # checks if you're pointing it to a folder with subfolders in: + fullPaths=[] + print("Checking for subfolders in top level directory:\n",flush=True) + for folder in tqdm(foldersFound): + fullpath=inputDirectory+'\\'+folder + if os.path.isdir(fullpath): + fullPaths.append(fullpath) + + if fullPaths: + # compile regexs outside of loops + folderPatternOne=re.compile("(^\d{7}_\d{4}$)",re.MULTILINE) + folderPatternTwo=re.compile("(^\d{7}_\d{4}_[a-zA-Z]{3}_[a-zA-Z]{3}$)",re.MULTILINE) + + print("\nChecking folder name compliance:\n",flush=True) + with open ('nonCompliantFolders.txt','a+') as badFolders: + for folder in tqdm(foldersFound): + matchOne=folderPatternOne.match(folder) + matchTwo=folderPatternTwo.match(folder) + if matchOne is None: + if matchTwo is None: + badFolders.write(folder+'\n') + + return fullPaths + else: + print("No subfolders found within this directory!",flush=True) + return 0 + +def scanFiles(pedantic,fullPaths): + print("\nChecking filename compliance:\n",flush=True) + with open('nonCompliantFiles.txt','a+') as badFileNames: + for directoryPath in tqdm(fullPaths): + filenames=os.listdir(directoryPath) + filePattern=(os.path.basename(directoryPath))+r'(_\d{4}.tif)' + if pedantic is True: + regexPattern=re.compile(filePattern) + else: + regexPattern=re.compile(filePattern,re.IGNORECASE) + for tifFile in filenames: + match=regexPattern.match(tifFile) + if match is None: + outstring=directoryPath+'\\'+tifFile + badFileNames.write(outstring+'\n') + +def setup(): + pedantic=False + mode=input("Type Y to run in case-sensitive filename checking mode, or enter to continue in case-insensitive mode:\n") + if mode is "Y": + pedantic=True + else: + pedantic=False + if pedantic is True: + print("Running in case-sensitive mode\n") + else: + print("Running in case-insensitive mode\n") + + + inputDirectory=r"\\P12B-NAS1\scandata2\HMD\RAW SCANS\Dave\FMP\Still to deliver to FMP" + inDir=input("Scan current folder? Y/N:\n") + if (inDir=="Y" or inDir=="y" or inDir=="Yes" or inDir=="yes"): + inputDirectoryUser=os.getcwd() + elif (inDir=="N" or inDir=="n" or inDir=="No" or inDir=="no"): + inputDirectoryUser=input("Enter full path of folder to scan:\n") + if inputDirectoryUser=="": + print("No path entered, using default filepath: {}\n".format(inputDirectory),flush=True) + inputDirectoryUser=inputDirectory + else: + inputDirectoryUser=inputDirectory + print("Input not recognised, using default filepath: {}\n".format(inputDirectory)) + print("Checking if directory exists...\n",flush=True) + + + + if os.path.isdir(inputDirectoryUser): + print("Input directory: {}\ exists!\n".format(inputDirectoryUser)) + inputDirectory = inputDirectoryUser + else: + print("File path not a valid directory, using default path {}\n".format(inputDirectory)) + + return pedantic,inputDirectory + +def main(): + + isPedantic,inputDir=setup() + filePaths=scanFolders(inputDir) + scanFiles(isPedantic,filePaths) + + + input("Complete! Press enter to exit") + + +if __name__=="__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d46013c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +altgraph==0.16.1 +future==0.17.1 +pefile==2019.4.18 +PyInstaller==3.5 +pywin32==224 +pywin32-ctypes==0.2.0 +tqdm==4.35.0