去除字符串中的重復字符——方法匯總和性能測試


Design an algorithm and write code to remove the duplicate characters in a string without using any additional buffer. NOTE: One or two additional variables are fine. An extra copy of the array is not.
Write the test cases for this method.
去除掉一個字符數組中的重復字符,不能使用附加的緩存空間,並設計測試用例

本文中的C++算法來自於HawStein的博客。

題目中已經要求不可以有額外的buffer,所以得在原數組的基礎上操作。
此外,假設所有字符char的取值大小只有[0,256]
算法思想:遍歷整個buffer,如果有一個字符,將這個字符寫入到s[p]的位置上去(p必然小於等於i),並且遍歷后面,和它相同的字符全部標為’\0’。

#include<cstring>
void RemoveDuplicate(char s[]){
    int len = strlen(s);
    int p = 0;
    for (int i = 0; i < len; i++)
    {
        if (s[i]!='\0')
        {
            s[p++] = s[i];
            for (int j = i + 1; j < len;j++)
                if (s[i] == s[j])
                    s[j] = '\0';
        }
    }
    s[p] = '\0';
}

這個方法的時間復雜度為O(n^2)

如果可以開一個固定大小的數組(即大小與n無關),那么時間復雜度可以到O(n)
算法思想:開設一個bool數組來記錄字符的狀態

void RemoveDuplicate1(char s[]){
    int len = strlen(s);
    bool c[256];
    int p = 0;
    memset(c, 0, sizeof(c));
    for (int i = 0; i < len; i++){
        if (!c[s[i]]){
            c[s[i]] = true;
            s[p++] = s[i];
        }
    }
    s[p] = '\0';
}

性能測試

基礎測試用例

#include<stdio.h>
#include<stdlib.h>
#include<cstring>
void RemoveDuplicate(char s[]);
void RemoveDuplicate1(char s[]);
void RemoveDuplicate2(char s[]);
int main(){

    char s1[] = "starcraft";
    char s2[] = "llllllll";
    char s3[] = "";
    char s4[] = "ssttaarrccrraafftt";
    printf("====RemoveDuplicate====\n");
    RemoveDuplicate(s1);printf("%s\n",s1);
    RemoveDuplicate(s2);printf("%s\n",s2);


    printf("====RemoveDuplicate1====\n");   
    RemoveDuplicate1(s3); printf("%s\n", s3);
    RemoveDuplicate1(s4); printf("%s\n", s4);





    system("pause");
    return 0;
}

====RemoveDuplicate==== starcf
l
====RemoveDuplicate1==== 
starcf

請按任意鍵繼續. . .

性能比較
為了獲得足夠大的字符數組,我修改了一下之前的測試用例生成函數,使其可以生成任意大小的字符數組。

#include <time.h>
#include <stdio.h>
#include <random>
#include <stdlib.h>
#include <math.h>
#define random(x) (rand()%x)
char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

/******生成測試數據data,並且將生成的數組寫入到文件中保存***********/
//*****type為'c'時指定生成一個隨機字符數組

void* get_test_data (const char* file_name, int size,char type,int maxnum){
    void* data;
    if (type == 'c')
       data = (char*)malloc(sizeof(char)*size);
    else
       data = (int*)malloc(sizeof(int)*size);
    FILE* output_file;
    fopen_s(&output_file,file_name, "w+");
    if (NULL == output_file){
        return NULL;
    }
    srand((int)time(0));
    for (int i = 0; i < size; i++){
        if (type == 'c'){
            char* temp = (char*)data;
            int ram = random(sizeof(charset)-2);
            if (ram < 0)
                ram *= -1;
            temp[i] = charset[ram];
            if (i == size - 1)
                temp[i] = '\0';
            fprintf(output_file, "%c", temp[i]);
        }
        else if (type == 'd')
        {
            ((int*)data)[i] = random(maxnum);

            fprintf(output_file, "%d", ((int*)data)[i]);
        }
    }

    return data;
}

測試用例

#include<stdio.h>
#include<stdlib.h>
#include<cstring>
#define SIZE 1000
void RemoveDuplicate(char s[]);
void RemoveDuplicate1(char s[]);
void RemoveDuplicate2(char s[]);
void* get_test_data(const char* file_name, int size,char type,int maxnum);

int main(){
    /*printf("====RemoveDuplicate2====\n"); RemoveDuplicate2(s1); printf("%s\n", s1); RemoveDuplicate2(s2); printf("%s\n", s2); RemoveDuplicate2(s3); printf("%s\n", s3); RemoveDuplicate2(s4); printf("%s\n", s4);*/

    char *bc=(char*)get_test_data("test.txt",SIZE,'c',0);
    char  bc_copy[SIZE];
    memcpy_s(bc_copy,SIZE,bc,SIZE);
    printf("%s\n=========\n", bc);
    printf("%s\n=========\n", bc_copy);
    RemoveDuplicate(bc); printf("%s\n", bc);
    RemoveDuplicate1(bc_copy); printf("%s\n", bc_copy);
    system("pause");
    return 0;
}

結果

slbkoXzycbXuWseuyrwyCjVusUcTggHHFdkrBMYIoHNTKWMEBajIBATVQSPDNclUEfgobDylLRVzVuVR
cajBbDKSGuLpbiGQEccsQfJNsUFGncOeMNujfDxUxijXITEnMOSHrXcxvzInvmGNUVNIvFLBomNxcDTq
ovkKsIQhivebpLMYSPFqLqTVkVhMWmorQabHwiMfAkqScUgUfVGmQWTRjVLnAaHiOqzCEbgtizvHoIkj
CNBnYKhbcVmyqOrKvCLORwhMnOskqVTCUzfaUzlJBnLpnKDlwoqxTthnzPSgcLpJWYstpAtRpDLLbFck
PJsJHodnUWWABoYyEYVjiXXziRCJbEUcpwQRFKWEFDrjbvUwVoDStIbIcLUwIRAyMrpRuywOjMABUGtY
csemrzEHPhTclDMoeaVOIzIMWycsvsArRPDoIXAwoblPSttHOfdflOKlQilTnGKMyySsXAcEObKGRTfg
CvMyLHwIVAVUtJdJfHTUApBHsMrLkOgNmXuOviCgTQdQGTVfopaPgxJjbqkSzghUeUOvTgMpJsxOuJHr
sUFNHzjLdgXRVNybtIaurnNMlfILQsERqycXbxuuWbIHWrgAODVsRgGVcBboVyesbuBrKyAcRKSsMFmj
ajgAnwjSzvBUbubjRwOsXcLEiJjMBUVHmlqQhVpCKhNKwtLFsmBcvNqnuGoUibEYVFncFqgSeXniHlxH
yBgJmueKrfSrbnNnqxhmcrfOCNbbgnXsuqthGQuIilFWAdsUgSecEfMtbzFSfwjOylxVTkqdVWclsEmb
GpGJSJaXuNSxOlbnUXjXRTdCvImdvJcvIzVFtsARccwpbldcyuIcdlrMXDccXSkCLfRQNDrDAIcqoqvq
dbaQGhfLQbEbyhpogAhRzCfpiLABUkBqLBGIokjcvYBtLWlQOiakscoamTYUIXGXaOzRnwGSEvDxUIxY
zuDJdLDwdVXWMrKzgXOEpOBQJQlTVepnrSPBFlk =========
slbkoXzycbXuWseuyrwyCjVusUcTggHHFdkrBMYIoHNTKWMEBajIBATVQSPDNclUEfgobDylLRVzVuVR
cajBbDKSGuLpbiGQEccsQfJNsUFGncOeMNujfDxUxijXITEnMOSHrXcxvzInvmGNUVNIvFLBomNxcDTq
ovkKsIQhivebpLMYSPFqLqTVkVhMWmorQabHwiMfAkqScUgUfVGmQWTRjVLnAaHiOqzCEbgtizvHoIkj
CNBnYKhbcVmyqOrKvCLORwhMnOskqVTCUzfaUzlJBnLpnKDlwoqxTthnzPSgcLpJWYstpAtRpDLLbFck
PJsJHodnUWWABoYyEYVjiXXziRCJbEUcpwQRFKWEFDrjbvUwVoDStIbIcLUwIRAyMrpRuywOjMABUGtY
csemrzEHPhTclDMoeaVOIzIMWycsvsArRPDoIXAwoblPSttHOfdflOKlQilTnGKMyySsXAcEObKGRTfg
CvMyLHwIVAVUtJdJfHTUApBHsMrLkOgNmXuOviCgTQdQGTVfopaPgxJjbqkSzghUeUOvTgMpJsxOuJHr
sUFNHzjLdgXRVNybtIaurnNMlfILQsERqycXbxuuWbIHWrgAODVsRgGVcBboVyesbuBrKyAcRKSsMFmj
ajgAnwjSzvBUbubjRwOsXcLEiJjMBUVHmlqQhVpCKhNKwtLFsmBcvNqnuGoUibEYVFncFqgSeXniHlxH
yBgJmueKrfSrbnNnqxhmcrfOCNbbgnXsuqthGQuIilFWAdsUgSecEfMtbzFSfwjOylxVTkqdVWclsEmb
GpGJSJaXuNSxOlbnUXjXRTdCvImdvJcvIzVFtsARccwpbldcyuIcdlrMXDccXSkCLfRQNDrDAIcqoqvq
dbaQGhfLQbEbyhpogAhRzCfpiLABUkBqLBGIokjcvYBtLWlQOiakscoamTYUIXGXaOzRnwGSEvDxUIxY
zuDJdLDwdVXWMrKzgXOEpOBQJQlTVepnrSPBFlk =========
slbkoXzycuWerwCjVUTgHFdBMYINKEaAQSPDfLRGpiJnOxvmqht
slbkoXzycuWerwCjVUTgHFdBMYINKEaAQSPDfLRGpiJnOxvmqht
請按任意鍵繼續. . .

版權聲明:本文為博主原創文章,轉載請標明出處。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM