开发者

Seeing duplicates in my output. Why?

The output of my app is producing duplicates of filenames...and i'm not 100% sure why that is.

My app "cleans" the file name by finding the regex pattern in the filename. If there is none, it dumps it into a "normal" list and ignores it.

Here is the code i'm using to display my output: [this keeps showing me duplicates of filenames!!]

public partial class DriveRecursion_Results : Form
{
    List<string> paths = new List<string>();


    public DriveRecursion_Results()
    {
        InitializeComponent();
    }




    public void DriveRecursion(string retPath)
    {
        string pattern = (@"[~#&!%+{}]+");

        Regex regEx = new Regex(pattern);

        string[] fileDrive = Directory.GetFiles(retPath, "*.*", SearchOption.AllDirectories);

        List<string> normal = new List<string>();
        List<string> fileNameOnlyList = new List<string>();


        dataGridView1.Rows.Clear();
        try
        {
            foreach (string fileNames in fileDrive)
            {
                string strippedFileName = System.IO.Path.GetFileName(fileNames);
                fileNameOnlyList.Add(strippedFileName);

                foreach (string nameOnly in fileNameOnlyList)
                {
                    if (regEx.IsMatch(strippedFileName))
                    {
                        //string fileNameOnly = Path.GetFileName(fileNames);
                        string pathOnly = Path.GetDirectoryName(fileNames);

                        DataGridViewRow dgr = new DataGridViewRow();

                        dgr.CreateCells(dataGridView1);
                        dgr.Cells[0].Value = pathOnly;
                        dgr.Cells[1].Value = nameOnly;
                        dataGridView1.Rows.Add(dgr);
                        string pathforInvalidName = System.IO.Path.Combine(System.IO.Path.GetDirectoryName(nameOnly), pathOnly);


                        paths.Add(pathforInvalidName);


                    }

                    else
                    {
                        normal.Add(strippedFileName);

                    }
                }

            }

        }
        catch (Exception e)
        {
            StreamWriter sw = new StreamWriter(retPath + "ErrorLog.txt");
            sw.Write(e);

        }


    }


    private void button1_Click_1(object sender, EventArgs e)
    {

        this.Close();

        CleanNames clean = new CleanNames();
        clean.Sanitizer(paths);
        clean.Show();



    }

Once it's done identifying which files need to be renamed, it cleans up the "dirty" names:

public partial class CleanNames : Form
{
    public CleanNames()
    {
        InitializeComponent();

    }

    public void Sanitizer(List<string> paths)
    {
        string regPattern = (@"[~#&!%+{}]+");
        string replacement = " ";

        Regex regExPattern = new Regex(regPattern);
        Regex regExPattern2 = new Regex(@"\s{2,}");

        StreamWriter errors = new StreamWriter(@"S:\Test\Errors.txt", true);
        var filesCount = new Dictionary<string, int>();


        dataGridView1.Rows.Clear();

           try
            {

              foreach (string files2 in paths)
              {

                string filenameOnly = System.IO.Path.GetFileName(files2);
                string pathOnly = System.IO.Path.GetDirectoryName(files2);
               开发者_Go百科 string sanitizedFileName = regExPattern.Replace(filenameOnly, replacement);
                sanitizedFileName = regExPattern2.Replace(sanitizedFileName, replacement);
                string sanitized = System.IO.Path.Combine(pathOnly, sanitizedFileName);


                if (!System.IO.File.Exists(sanitizedFileName))
                {
                    DataGridViewRow clean = new DataGridViewRow();
                    clean.CreateCells(dataGridView1);
                    clean.Cells[0].Value = pathOnly;
                    clean.Cells[1].Value = filenameOnly;
                    clean.Cells[2].Value = sanitizedFileName;
                    dataGridView1.Rows.Add(clean);

                    System.IO.File.Move(files2, sanitized);
                }

                else
                {
                    if (filesCount.ContainsKey(sanitizedFileName))
                    {
                        filesCount[sanitized]++;
                    }
                    else
                    {
                        filesCount.Add(sanitized, 1);
                    }
                    string newFileName = String.Format("{0}{1}{2}",
                    System.IO.Path.GetFileNameWithoutExtension(sanitized),
                    filesCount[sanitized].ToString(),
                    System.IO.Path.GetExtension(sanitized));
                    string newFilePath = System.IO.Path.Combine(System.IO.Path.GetDirectoryName(sanitized), newFileName);
                    newFileName = regExPattern2.Replace(newFileName, replacement);
                    System.IO.File.Move(files2, newFilePath);
                    sanitized = newFileName;

                    DataGridViewRow clean = new DataGridViewRow();
                    clean.CreateCells(dataGridView1);
                    clean.Cells[0].Value = pathOnly;
                    clean.Cells[1].Value = filenameOnly;
                    clean.Cells[2].Value = newFileName;

                    dataGridView1.Rows.Add(clean);

                }




              }
            }
           catch (Exception e)
           {
               errors.Write(e);
           }


    }

    private void SanitizeFileNames_Load(object sender, EventArgs e)
    { }

    private void dataGridView1_CellContentClick(object sender, DataGridViewCellEventArgs e)
    {

    }

    private void button1_Click(object sender, EventArgs e)
    {
        Application.Exit();
    }

What i'm trying to do here is ONLY show files that need to be renamed (not all files). I want to take those dirty filenames and clean them with my 2nd class.

Anybody know why i'm seeing multiples of the same file on the output? Anybody know how to fix this?!?!


My immediate observation is that your foreach (string nameOnly in fileNameOnlyList) loop should not be nested where it is. Your logic looks like this.

For each filename:
    Add it to the list.
    For *everything in the list*...

So you'll add one. Then process it. Then add another. Then process both. Then add. Then process all three. Etcetera.

Try this.

        foreach (string fileNames in fileDrive)
        {
            string strippedFileName = System.IO.Path.GetFileName(fileNames);
            fileNameOnlyList.Add(strippedFileName);
        }

        foreach (string strippedFileName in fileNameOnlyList)
        {
            if (regEx.IsMatch(strippedFileName))
            // ...
        }

Edit

Even better, why have two loops?

        foreach (string fileNames in fileDrive)
        {
            string strippedFileName = System.IO.Path.GetFileName(fileNames);
            fileNameOnlyList.Add(strippedFileName);

            if (regEx.IsMatch(strippedFileName))
            // ...
        }


My first guess is that you are seeing duplicates because you have the loop over fileNameOnlyList inside the loop over fileDrive. This when you are processing second file name from fileDrive collection, you will add the first one to your data grid as well.

There are two possible ways to fix it: - move the inner loop out of the outer loop and put it just under it - remove the inner loop (but leave the code that is inside it) and use strippedFileName instead of nameOnly variable in the code


You search recursively through your directory structure

Directory.GetFiles(retPath, "*.*", SearchOption.AllDirectories);

but you use only the filename

System.IO.Path.GetFileName(fileNames);

So if you have the same file in nested folders, it will show up twice.

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜