diff --git a/scripts/remove-duplicates.js b/scripts/remove-duplicates.js index 02586f11..2d30fd4e 100644 --- a/scripts/remove-duplicates.js +++ b/scripts/remove-duplicates.js @@ -3,33 +3,44 @@ const path = require("node:path"); (async () => { try { + // Define the base directory containing .txt files const directoryPath = path.join(__dirname, ".."); + + // Retrieve all .txt files in the directory const files = (await fs.readdir(directoryPath)).filter((file) => - file.endsWith(".txt"), + file.endsWith(".txt") ); + // Process each file concurrently await Promise.all( files.map(async (file) => { const filePath = path.join(directoryPath, file); const fileContents = await fs.readFile(filePath, "utf8"); - const lines = fileContents.split("\n"); + // Initialize a Set to keep track of unique domains const existingDomains = new Set(); - const filteredLines = lines.filter((line) => { - if (line.startsWith("0.0.0.0 ")) { - const domain = line.replace("0.0.0.0 ", ""); - if (!existingDomains.has(domain)) { - existingDomains.add(domain); - return true; + const filteredLines = fileContents + .split("\n") + .filter((line) => { + // Filter duplicate "0.0.0.0" entries + if (line.startsWith("0.0.0.0 ")) { + const domain = line.slice(8); // Extract domain after "0.0.0.0 " + if (existingDomains.has(domain)) { + return false; // Exclude duplicate + } + existingDomains.add(domain); // Add unique domain to Set } - return false; - } - return true; - }); + return true; // Include non-duplicate or non-"0.0.0.0" lines + }); + // Write the filtered content back to the file await fs.writeFile(filePath, filteredLines.join("\n"), "utf8"); - }), + + console.log(`Processed and removed duplicates in: ${file}`); + }) ); + + console.log("All files processed successfully."); } catch (error) { console.error("Error processing files:", error); }