|
| 1 | +# SharePoint Online - Export Duplicate Files |
| 2 | + |
| 3 | +## Summary |
| 4 | + |
| 5 | +This is a simple PowerShell script that loops thorough all the files in a SharePoint Online Tenant, and compares the file Hashes of all your files, in order to identify any duplicate files, while the script does not delete them, it'll export a nice overview for you to review. |
| 6 | + |
| 7 | +**NOTE** the proper way to do this would be using [Microsoft Graph Data Connect](https://learn.microsoft.com/en-us/graph/data-connect-concept-overview), but I'm cheap, and only needed it on my development tenant, so I wrote this script instead, but the concept remains the same. |
| 8 | + |
| 9 | +**NOTE** I Ran this using delegated permissions, but if you want the full overview of all files, you should run this using application permissions, as delegated permissions will only return files that the user has access to. |
| 10 | + |
| 11 | +# [PnP PowerShell](#tab/pnpps) |
| 12 | + |
| 13 | +```powershell |
| 14 | +$ErrorActionPreference = "Stop" |
| 15 | +$SharePointRootSiteUrl = "http://<tenant>.sharepoint.com/" |
| 16 | +
|
| 17 | +
|
| 18 | +Connect-PnPOnline -Interactive -Url $SharePointRootSiteUrl -ClientId "<ClientId>"; |
| 19 | +
|
| 20 | +$allFiles = New-Object System.Collections.ArrayList; |
| 21 | +
|
| 22 | +
|
| 23 | +$sites = Invoke-PnPGraphMethod -Url "https://graph.microsoft.com/v1.0/sites/?`$search=`"http*`"&`$select=id,webUrl,displayName&`$top=100" -All; |
| 24 | +
|
| 25 | +foreach ($site in $sites.value) { |
| 26 | + Write-Host "> Site: $($site.displayName) - ($($site.webUrl))" |
| 27 | + $drives = Invoke-PnPGraphMethod -Url "https://graph.microsoft.com/v1.0/sites/$($site.id)/drives?`$select=id,webUrl,name&`$top=100" -All; |
| 28 | +
|
| 29 | + foreach ($drive in $drives.value) { |
| 30 | + Write-Host "`t> Drive: $($drive.name) - ($($drive.webUrl))"; |
| 31 | +
|
| 32 | + ## Would've loved to use a $select=file,id,webUrl,size,name but that breaks for some reason when using PnP PowerShell |
| 33 | + $files = Invoke-PnPGraphMethod -Url "https://graph.microsoft.com/v1.0/sites/$($site.id)/drives/$($drive.id)/items?`$filter=file ne null" -All; |
| 34 | +
|
| 35 | + foreach ($file in $files.value | Where-Object { $_.file -ne $null }) { |
| 36 | + Write-Host "`t`t>File: $($file.name)"; |
| 37 | +
|
| 38 | + $allFiles.Add([PSCustomObject]@{ |
| 39 | + SiteId = $site.id |
| 40 | + DriveId = $drive.id |
| 41 | + FileId = $file.id |
| 42 | + FileName = $file.name |
| 43 | + FileWebUrl = $file.webUrl |
| 44 | + FileSize = $file.size |
| 45 | + FileHash = $file.file.hashes.quickXorHash |
| 46 | + }) | Out-Null |
| 47 | + } |
| 48 | + Write-Host "`t> Finished processing files in drive: $($drive.name)" |
| 49 | + } |
| 50 | + Write-Host "> Finished processing drives in site: $($site.displayName)" |
| 51 | +} |
| 52 | +
|
| 53 | +Write-Host "Finished loading all files" |
| 54 | +
|
| 55 | +$grouped = $allFiles | Where-Object {$null -ne $_.FileHash} | Group-Object -Property FileHash | Where-Object { $_.Count -gt 1 } | Sort-Object -Property Count -Descending; |
| 56 | +
|
| 57 | +foreach($group in $grouped){ |
| 58 | + Write-Host "Duplicate files with hash: $($group.Name)" |
| 59 | + foreach($file in $group.Group){ |
| 60 | + Write-Host "`t> $($file.FileName) - $($file.FileWebUrl)" |
| 61 | + } |
| 62 | + Write-Host "" |
| 63 | +} |
| 64 | +
|
| 65 | +
|
| 66 | +``` |
| 67 | + |
| 68 | +*** |
| 69 | + |
| 70 | +## Contributors |
| 71 | + |
| 72 | +| Author(s) | |
| 73 | +| ------------------------------- | |
| 74 | +| [Dan Toft](https://Dan-toft.dk) | |
| 75 | + |
| 76 | + |
| 77 | +[!INCLUDE [DISCLAIMER](../../docfx/includes/DISCLAIMER.md)] |
| 78 | +<img src="https://m365-visitor-stats.azurewebsites.net/script-samples/scripts/template-script-submission" aria-hidden="true" /> |
0 commit comments