......

BITMAPINFOHEADER bi;

bi.biSize = sizeof(BITMAPINFOHEADER);
bi.biWidth = bmpScreen.bmWidth;
bi.biHeight = bmpScreen.bmHeight;
bi.biPlanes = 1;
bi.biBitCount = bmpScreen.bmBitsPixel; 
bi.biCompression = BI_RGB;
bi.biSizeImage = 0;
bi.biXPelsPerMeter = 0;
bi.biYPelsPerMeter = 0; 
bi.biClrUsed = 0; 
bi.biClrImportant = 0; 

DWORD dwBmpSize = ((bmpScreen.bmWidth * bi.biBitCount + 31) / 32) * 4 * bmpScreen.bmHeight;  cBmpData = new unsigned char[dwBmpSize ]; 

GetDIBits(hdcScreen, hbmScreen, 0, (UINT)bmpScreen.bmHeight, cBmpData, (BITMAPINFO *)&bi, DIB_RGB_COLORS); 

DeleteObject(bmpScreen); 

ReleaseDC(hdcScreen); 

return cBmpData; 
} <---运行到这里时提示堆栈损坏

磁盘文件排序
标题讲述,来自《编程珠玑》:
输入:一个最多带有n个不雷同的正整数的公文,其中每一个数都自愧不如等于n,且n=10^7。
输出:拿到按从小到大升序排列的带有全部输入的平头的列表。
规则:最多有大体1MB的内存空间可用,但磁盘空间充裕。且须要运转时刻在6分钟以下,10秒为一级结果。

......

BITMAPINFOHEADER bi;

bi.biSize = sizeof(BITMAPINFOHEADER);
bi.biWidth = bmpScreen.bmWidth;
bi.biHeight = bmpScreen.bmHeight;
bi.biPlanes = 1;
bi.biBitCount = bmpScreen.bmBitsPixel; 
bi.biCompression = BI_RGB;
bi.biSizeImage = 0;
bi.biXPelsPerMeter = 0;
bi.biYPelsPerMeter = 0; 
bi.biClrUsed = 0; 
bi.biClrImportant = 0; 

DWORD dwBmpSize = ((bmpScreen.bmWidth * bi.biBitCount + 31) / 32) * 4 * bmpScreen.bmHeight;  cBmpData = new unsigned char[dwBmpSize ]; 

GetDIBits(hdcScreen, hbmScreen, 0, (UINT)bmpScreen.bmHeight, cBmpData, (BITMAPINFO *)&bi, DIB_RGB_COLORS); 

DeleteObject(bmpScreen); 

ReleaseDC(hdcScreen); 

return cBmpData; 
} <---运行到这里时提示堆栈损坏

1、引言

  位图是运用位(bit)数组来对数据进行总结,排序和去重,其结构图如下:

    澳门金沙国际 1

  其中位图的索引映射须要仓储的值,位图索引所在地点的值表示索引对应的值是不是早已储存。

那是因为其实GetDIBits的第五个参数需要的其实是一个BITMAPINFO结构,而我们传入的是BITMAPINFOHEADER。

分析:

这是因为实际GetDIBits的第五个参数需要的其实是一个BITMAPINFO结构,而我们传入的是BITMAPINFOHEADER。

2、接口

 1 public interface ByteMap {
 2 
 3     /* get the value in the index of byte map
 4      * @param index the index in byte map to get 
 5      * @return 1 or 0 base the value in the index of byte map
 6      */
 7     int getByte(int index);
 8     
 9     /* set the value in the index of byte map
10      * @param index the index in byte map to set
11      * @param flag the value setted in the index of byte map
12      */
13     void setByte(int index, int flag);
14     
15     /* set the value in byte map from start to end
16      * @param start the start index in byte map
17      * @param end the end index in byte map
18      * @flag the value setted in byte map from start to end
19      */
20     void setByte(int start, int end, int flag);
21     
22     /*
23      * get the size of byte map
24      */
25     int getSize();
26     
27     /*
28      * set the size of byte map
29      * @param size to be setted
30      */
31     void setSize(int size);
32     
33     /*
34      * count how many 1 in byte map
35      */
36     int countOne();
37     
38     /*
39      * count how many i in byte map from start to end
40      */
41     int countOne(int start, int end);
42     
43     /*
44      * get the sub map of byte map from start to end
45      */
46     ByteMap subMap(int start, int end);
47     
48     /*
49      * flip the value in byte map 
50      */
51     ByteMap not();
52     
53     /*
54      * or operation with another byte map
55      */
56     ByteMap or(ByteMap byteMap);
57     
58     /*
59      * xor operation with another byte map
60      */
61     ByteMap xor(ByteMap byteMap);
62     
63     /*
64      * and operation with another byte map
65      */
66     ByteMap and(ByteMap byteMap);
67     
68     /*
69      * byte map left shift operation
70      */
71     ByteMap leftShift(int shiftStep);
72     
73     /*
74      * byte map right shift operation
75      */
76     ByteMap rightShift(int shiftStep);
77 }

设若在位图不低于十六人时,那是行得通的。不过在位图小于拾伍人时,它还索要此外的内存空间来存储1个调色板数据,所以就会发生堆栈损坏的错误。

① 、归并排序。你或者会想到把磁盘文件举办归并排序,但难点须求中,你唯有1MB的内存空间可用,所以,归并排序那一个法子十二分。

要是在位图不低于十六人时,那是卓有成效的。但是在位图小于拾肆位时,它还索要此外的内存空间来存储3个调色板数据,所以就会发生堆栈损坏的失实。

 3、实现

  定义静态byte数组常量,用于火速查看位图上索引对应的值:

 1 private static final byte[] BYTE_VALUE = {
 2         0x0001,
 3         0x0002,
 4         0x0004,
 5         0x0008,
 6         0x0010,
 7         0x0020,
 8         0x0040,
 9         -0x0080
10 };

宣称字段:

1 private int size;
2 private byte b;
3 private byte[] biteMap;

其间size为位图的尺寸;当位图的size小于等于8时,使用b,否则使用biteMap

构造函数:

 1 public ByteMapImpl() {
 2     this(8);
 3 }
 4     
 5 public ByteMapImpl(int size) {
 6     if(size <= 0) {
 7         throw new IllegalArgumentException("ByteMap size value should be positive");
 8     }
 9     this.size = size;
10     if(size <= 8) {
11         this.b = 0;
12     }else {
13         int len = (size >> 3) + ((size & 7) > 0 ? 1 : 0);
14         this.biteMap = new byte[len];
15     }
16 }

磁盘文件排序,指示堆栈损坏的消除办法。 其中位图的目录从0开头的

位图最主要的八个接口是:

 1 public int getByte(int index) {
 2     if(index < 0 || index >= this.size) {
 3         throw new IllegalArgumentException("index out of bit map");
 4     }
 5     byte unit = (this.size <= 8) ? this.b : this.biteMap[index >> 3];
 6     int result = 0;
 7     result = unit & BYTE_VALUE[index & 7];
 8     return result == 0 ? 0 : 1;
 9 }
10     
11 public void setByte(int index, int flag) {
12     if(index < 0 || index >= this.size) {
13         throw new IllegalArgumentException("index out of bit map");
14     }
15     if(flag != 0 && flag != 1) {
16         throw new IllegalArgumentException("illegal flag argument, must be 1 or 0");
17     }
18         
19     if(flag == this.getByte(index)) {
20         return;
21     }
22     int offSet = index & 7;
23     if(this.size <= 8) {
24         if(flag == 1) {
25             this.b = (byte) (this.b | BYTE_VALUE[offSet]);
26         }else {
27             this.b = (byte) (this.b & (~BYTE_VALUE[offSet]));
28         }
29             
30     }else {
31         int unitIndex = index >> 3;
32         byte unit = this.biteMap[unitIndex];
33         if(flag == 1) {
34             this.biteMap[unitIndex] = (byte) (unit | BYTE_VALUE[offSet]);
35         }else {
36             this.biteMap[unitIndex] = (byte) (unit & (~BYTE_VALUE[offSet]));
37         }
38     }
39 }

透过位操作与,或以及移动达成上述七个接口

余下的接口基于以上完成的:

  1 public void setByte(int start, int end, int flag) {
  2     for(int i = start ; i <= end; ++i) {
  3         this.setByte(i, flag);
  4     }
  5 }
  6     
  7 public int getSize() {
  8     return size;
  9 }
 10 
 11 public void setSize(int size) {
 12     this.size = size;
 13 }
 14     
 15 public int countOne() {
 16     return this.countOne(0, this.size - 1);
 17 }
 18     
 19 public int countOne(int start, int end) {
 20     int count = 0;
 21     for(int i = start; i <= end; i++) {
 22         count += this.getByte(i);
 23     }
 24     return count;
 25 }
 26     
 27 public ByteMap subMap(int start, int end) {
 28     ByteMap byteMap = new ByteMapImpl(end - start + 1);
 29     for(int i = start; i <= end; ++i) {
 30         if(this.getByte(i) != 0) {
 31             byteMap.setByte(i - start, 1);
 32         }
 33     }
 34     return byteMap;
 35 }
 36     
 37 public  ByteMap not() {
 38     ByteMap byteMap = new ByteMapImpl(this.size);
 39     for(int i = 0; i < this.size; ++i) {
 40         int flag = (this.getByte(i) == 0) ? 1 : 0;
 41         byteMap.setByte(i, flag);
 42     }
 43     return byteMap;
 44 }
 45     
 46 public ByteMap or(ByteMap byteMap) {
 47     int s1 = this.size;
 48     int s2 = byteMap.getSize();
 49     int orSize = s1 > s2 ? s1 : s2;
 50     ByteMap orMap = new ByteMapImpl(orSize);
 51     int i = 0;
 52     while(i < s1 && i < s2) {
 53         if(this.getByte(i) != 0 || byteMap.getByte(i) != 0) {
 54             orMap.setByte(i, 1);
 55         }
 56         ++i;
 57     }
 58     while(i < s1) {
 59         if(this.getByte(i) != 0) {
 60             orMap.setByte(i, 1);
 61         }
 62         ++i;
 63     }
 64     while(i < s2) {
 65         if(byteMap.getByte(i) != 0) {
 66             orMap.setByte(i, 1);
 67         }
 68         ++i;
 69     }
 70     return orMap;
 71 }
 72     
 73 public ByteMap xor(ByteMap byteMap) {
 74     int s1 = this.size;
 75     int s2 = byteMap.getSize();
 76     int xorSize = s1 > s2 ? s1 : s2;
 77     ByteMap xorMap = new ByteMapImpl(xorSize);
 78     int i = 0;
 79     while(i < s1 && i < s2) {
 80         if(this.getByte(i) !=  byteMap.getByte(i)) {
 81             xorMap.setByte(i, 1);
 82         }
 83         ++i;
 84     }
 85     while(i < s1) {
 86         if(this.getByte(i) != 0) {
 87             xorMap.setByte(i, 1);
 88         }
 89         ++i;
 90     }
 91     while(i < s2) {
 92         if(byteMap.getByte(i) != 0) {
 93             xorMap.setByte(i, 1);
 94         }
 95         ++i;
 96     }
 97     return xorMap;
 98 }
 99     
100 public ByteMap and(ByteMap byteMap) {
101     int s1 = this.size;
102     int s2 = byteMap.getSize();
103     int orSize = s1 > s2 ? s1 : s2;
104     ByteMap andMap = new ByteMapImpl(orSize);
105     int i = 0;
106     while(i < s1 && i < s2) {
107         if(this.getByte(i) != 0 && byteMap.getByte(i) != 0) {
108             andMap.setByte(i, 1);
109         }
110         ++i;
111     }
112     return andMap;
113 }
114     
115 public ByteMap leftShift(int shiftStep) {
116     ByteMap shiftMap = new ByteMapImpl(this.size);
117     if(this.countOne() > 0 && shiftStep < this.size) {
118         if(shiftStep < 0) {
119             return this.rightShift((0 - shiftStep));
120         }else {
121             for(int i = shiftStep; i < this.size; ++i) {
122                 if(this.getByte(i) != 0) {
123                     shiftMap.setByte(i - shiftStep, 1);
124                 }
125             }
126         }
127     }
128     return shiftMap;
129 }
130     
131 public ByteMap rightShift(int shiftStep) {
132     ByteMap shiftMap = new ByteMapImpl(this.size);
133     if(this.countOne() > 0 && shiftStep < this.size) {
134         if(shiftStep < 0) {
135             return this.leftShift((0 - shiftStep));
136         }else {
137             for(int i = this.size - shiftStep - 1; i >= 0; --i) {
138                 if(this.getByte(i) != 0) {
139                     shiftMap.setByte(i + shiftStep, 1);
140                 }
141             }
142         }
143     }
144     return shiftMap;
145 }

科学的做法是那样的

二 、位图方案。例如正如《编程珠玑》一书上所述,用二个十几个人长的位字符串来表示3个存有因素都自愧不如20的简约的非负整数集合,边框用如下字符串来表示集合{1,2,3,5,8,13}:
0 1 1 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0
上述集合中各数对应的地点则置1,没有相应的数的岗位则置0。

正确的做法是这样的

4、测试

为了便于测试,重写Object类型的toString方法:

 1 @Override
 2 public String toString() {
 3     StringBuilder sb = new StringBuilder();
 4     if(this.size <= 8) {
 5         for(int i = 0; i < 8; ++i) {
 6             if(i < 7) {
 7                 try {
 8                     sb.append(this.getByte(i) + ",");
 9                 } catch (IllegalArgumentException e) {
10                     sb.append("0,");
11                 }
12             }else {
13                 try {
14                     sb.append(this.getByte(i));
15                 } catch (IllegalArgumentException e) {
16                     sb.append("0");
17                 }
18             }
19         }
20     }else {
21         for(int i = 0; i < this.biteMap.length*8; ++i) {
22             if((i&7) == 0) {
23                 sb.append("\n" + (i>>3) + ":");
24             }else {
25                 sb.append(",");
26             }
27             try {
28                 sb.append(this.getByte(i));
29             } catch (IllegalArgumentException e) {
30                 sb.append("0");
31             }
32         }
33     }
34     return sb.toString();
35 }

测试main函数:

 1 public static void main(String[] args) {
 2     ByteMap byteMap1 = new ByteMapImpl(60);
 3     byteMap1.setByte(3, 5, 1);
 4     System.out.println("byteMap1:" + byteMap1.toString());
 5     System.out.println("byteMap1 count 1:" + byteMap1.countOne());
 6     ByteMap byteMap2 = byteMap1.subMap(1, 11);
 7     System.out.println("byteMap1 sub map:" + byteMap2);
 8     System.out.println("byteMap1 sub map count 1:" + byteMap2.countOne());
 9     System.out.println("or map:" + byteMap1.or(byteMap2));
10     System.out.println("and map:" + byteMap1.and(byteMap2));
11     System.out.println("xor map:" + byteMap1.xor(byteMap2));
12     System.out.println("byteMap1 left shift 3 bit:" + byteMap1.leftShift(3));
13     System.out.println("byteMap1 right shift 3 bite:" + byteMap1.rightShift(3));
14 }

结果:

byteMap1:
0:0,0,0,1,1,1,0,0
1:0,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
byteMap1 count 1:3
byteMap1 sub map:
0:0,0,1,1,1,0,0,0
1:0,0,0,0,0,0,0,0
byteMap1 sub map count one:3
or map:
0:0,0,1,1,1,1,0,0
1:0,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
and map:
0:0,0,0,1,1,0,0,0
1:0,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
xor map:
0:0,0,1,0,0,1,0,0
1:0,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
byteMap1 left shift 3 bit:
0:1,1,1,0,0,0,0,0
1:0,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
byteMap1 right shift 3 bite:
0:0,0,0,0,0,0,1,1
1:1,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
struct { BITMAPINFO info; RGBQUAD moreColors[255]; } fbi;
BITMAPINFOHEADER &bi = fbi.info.bmiHeader;
bi.biSize = sizeof(BITMAPINFOHEADER);
...
GetDIBits(..., &fbi.info, ...);

参照《编程珠玑》一书上的位图方案,针对10^7个数据量的磁盘文件排序难题,可以那样考虑,由于各类柒个人十进制整数表示3个紧跟于一千万的平头。可以应用贰个有所1000万个位的字符串来代表这一个文件,其中,当且仅当整数i在文书中留存时,第i位为1。采纳那一个位图的方案是因为大家面对的那几个题材的特殊性:一 、输入数据限制在相对较小的限量内,② 、数据尚未重新,③ 、其中的每条记下都以纯净的正整数,没有其余其余与之提到的数据。
故而,此题材用位图的方案分为以下三步举行缓解:
第②步,将装有的位都置为0,从而将聚集起来化为空。
其次步,通过读入文件中的各种整数来建立汇集,将每种对应的位都置为1。
其三步,检验每1人,若是该位为1,就输出对应的平头。
透过以上三步后,暴发有序的出口文件。令n为位图向量中的位数(本例中为一千0000),程序可以用伪代码表示如下:

struct { BITMAPINFO info; RGBQUAD moreColors[255]; } fbi;
BITMAPINFOHEADER &bi = fbi.info.bmiHeader;
bi.biSize = sizeof(BITMAPINFOHEADER);
...
GetDIBits(..., &fbi.info, ...);

5、总结

  使用位图适合稠密数据,不然会招致大气空中的荒废

 

//第一步,将所有的位都初始化为0   
for i ={0,....n}  
    bit[i]=0;  

//第二步,通过读入文件中的每个整数来建立集合,将每个对应的位都置为1。   
for each i in the input file  
    bit[i]=1;  

//第三步,检验每一位,如果该位为1,就输出对应的整数。   
for i={0...n}  
    if bit[i]==1  
      write i on the output file  

 

6、思考

  (1)为什么 BYTE_VALUE[7] = -0x0080;

  (2)完结的位图是非线程安全的,怎么着立异既能保险线程安全,又不会大幅下落质量;

 

然则很快,大家就将发现到,用此位图方法,严俊说来依旧不太行,空间消耗10^7/8依旧超出1M(1M=1024*1024空间,小于10^7/8)。
既然如此倘若用位图方案以来,大家须求约1.25MB(若每条记下是5个人的正整数的话,则一千0000/(1024*1024*8)
~=
1.2M)的上空,而前些天唯有1MB的可用存储空间,那么终究该作何处理吧?能够频仍施用位图举办排序。

源码参考:

③ 、多路归并。把这么些文件分为若干尺寸的几块,然后分别对每一块举行排序,最终成功全套经过的排序。k趟算法可以在kn的年华支付内和n/k的空间开发内成功对最多n个小于n的无重复正整数的排序。比如可分为2块(k=2,1趟左右占用的内存唯有1.25/2=0.625M),1~4999999,和5000000~9999999先遍历一趟,处理1澳门金沙国际,~4999999的数据块(用五千000/8=62伍仟个字的贮存空间来排序0~4999999之内的整数),然后再第一趟,对四千001~1000000这一数据块拍卖。

本着那一个要分两趟给磁盘文件排序的实际难题编写完整代码,如下:

澳门金沙国际 2澳门金沙国际 3View Code

//copyright@ yansha     
//July、2010.05.30。     
//位图方案解决10^7个数据量的文件的排序问题     
//如果有重复的数据,那么只能显示其中一个 其他的将被忽略     
#include <iostream>     
#include <bitset>     
#include <assert.h>     
#include <time.h>     
using namespace std;    

const int max_each_scan = 5000000;    

int main()    
{    
    clock_t begin = clock();    
    bitset<max_each_scan> bit_map;    
    bit_map.reset();    

    // open the file with the unsorted data     
    FILE *fp_unsort_file = fopen("data.txt", "r");    
    assert(fp_unsort_file);    
    int num;    

    // the first time scan to sort the data between 0 - 4999999     
    while (fscanf(fp_unsort_file, "%d ", &num) != EOF)    
    {    
        if (num < max_each_scan)    
            bit_map.set(num, 1);    
    }    

    FILE *fp_sort_file = fopen("sort.txt", "w");    
    assert(fp_sort_file);    
    int i;    

    // write the sorted data into file     
    for (i = 0; i < max_each_scan; i++)    
    {    
        if (bit_map[i] == 1)    
            fprintf(fp_sort_file, "%d ", i);    
    }    

    // the second time scan to sort the data between 5000000 - 9999999     
    int result = fseek(fp_unsort_file, 0, SEEK_SET);    
    if (result)    
        cout << "fseek failed!" << endl;    
    else    
    {    
        bit_map.reset();    
        while (fscanf(fp_unsort_file, "%d ", &num) != EOF)    
        {    
            if (num >= max_each_scan && num < 10000000)    
            {    
                num -= max_each_scan;    
                bit_map.set(num, 1);    
            }    
        }    
        for (i = 0; i < max_each_scan; i++)    
        {    
            if (bit_map[i] == 1)    
                fprintf(fp_sort_file, "%d ", i + max_each_scan);    
        }    
    }    

    clock_t end = clock();    
    cout<<"用位图的方法,耗时:"<<endl;    
    cout << (end - begin) / CLK_TCK << "s" << endl;    
    fclose(fp_sort_file);    
    fclose(fp_unsort_file);    
    return 0;    
}    

上述的位图方案,共必要扫描输入数据三遍,具体实践步骤如下:
先是次,只处理1—4999999时期的数据,这一个数都以稍差于伍仟000的,对这个数举行位图排序,只须求约四千000/8=625000Byte,约等于0.625M,排序后输出。
其次次,扫描输入文件时,只处理4999999-一千0000的数量项,也只要求0.625M(可以应用第两回拍卖申请的内存)。由此,总共也只需求0.625M。

磁盘文件排序的C完结

1、内排序
由于须要的可用内存为1MB,那么每一次可以在内存中对250K的数码进行排序,然后将逐步的数写入硬盘。
那就是说10M的数量要求循环三十八回,最后爆发肆拾个静止的公文。

二 、多路归并排序
(1)将各种文件最初始的数读入(由于有序,所以为该文件最小数),存放在一个轻重为40的first_data数组中;
(2)选择first_data数组中细小的数min_data,及其对应的文件索引index;
(3)将first_data数组中小小的的数写入文件result,然后更新数组first_data(依据index读取该公文下二个数代替min_data);
(4)判断是还是不是具有数据都读取完成,否则重返(2)。

全部代码如下:

澳门金沙国际 4澳门金沙国际 5View Code

//copyright@ yansha   
//July、updated,2011.05.28。   
#include <iostream>   
#include <string>   
#include <algorithm>   
#include <time.h>   
using namespace std;  

int sort_num = 10000000;  
int memory_size = 250000;  //每次只对250k个小数据量进行排序   

int read_data(FILE *fp, int *space)  
{  
    int index = 0;  
    while (index < memory_size && fscanf(fp, "%d ", &space[index]) != EOF)  
        index++;  

    return index;  
}  

void write_data(FILE *fp, int *space, int num)  
{  
    int index = 0;  
    while (index < num)  
    {  
        fprintf(fp, "%d ", space[index]);  
        index++;  
    }  
}  

// check the file pointer whether valid or not.   
void check_fp(FILE *fp)  
{  
    if (fp == NULL)  
    {  
        cout << "The file pointer is invalid!" << endl;  
        exit(1);  
    }  
}  

int compare(const void *first_num, const void *second_num)  
{  
    return *(int *)first_num - *(int *)second_num;  
}  

string new_file_name(int n)  
{  
    char file_name[20];  
    sprintf(file_name, "data%d.txt", n);  
    return file_name;  
}  

int memory_sort()  
{  
    // open the target file.   
    FILE *fp_in_file = fopen("data.txt", "r");  
    check_fp(fp_in_file);  

    int counter = 0;  
    while (true)  
    {  
        // allocate space to store data read from file.   
        int *space = new int[memory_size];  
        int num = read_data(fp_in_file, space);  

        // the memory sort have finished if not numbers any more.   
        if (num == 0)  
            break;  

        // quick sort.   
        qsort(space, num, sizeof(int), compare);  

        // create a new auxiliary file name.   
        string file_name = new_file_name(++counter);  
        FILE *fp_aux_file = fopen(file_name.c_str(), "w");  
        check_fp(fp_aux_file);  

        // write the orderly numbers into auxiliary file.   
        write_data(fp_aux_file, space, num);  

        fclose(fp_aux_file);  
        delete []space;  
    }  
    fclose(fp_in_file);  

    // return the number of auxiliary files.   
    return counter;  
}  

void merge_sort(int file_num)  
{  
    if (file_num <= 0)  
        return;  

    // create a new file to store result.   
    FILE *fp_out_file = fopen("result.txt", "w");  
    check_fp(fp_out_file);  

    // allocate a array to store the file pointer.   
    FILE **fp_array = new FILE *[file_num];  

    int i;  
    for (i = 0; i < file_num; i++)  
    {  
        string file_name = new_file_name(i + 1);  
        fp_array[i] = fopen(file_name.c_str(), "r");  
        check_fp(fp_array[i]);  
    }  

    int *first_data = new int[file_num]; //new出个大小为0.1亿/250k数组,由指针first_data指示数组首地址   

    bool *finish = new bool[file_num];  
    memset(finish, false, sizeof(bool) * file_num);  

    // read the first number of every auxiliary file.   
    for (i = 0; i < file_num; i++)  
        fscanf(fp_array[i], "%d ", &first_data[i]);  

    while (true)  
    {  
        int index = 0;  
        while (index < file_num && finish[index])  
            index++;  

        // the finish condition of the merge sort.   
        //要保证所有文件都读完,必须使得finish[0]...finish[40]都为真   
        if (index >= file_num)  
            break;  

        int min_data = first_data[index];  

        // choose the relative minimum in the array of first_data.   
        for (i = index + 1; i < file_num; i++)  
        {  
            if (min_data > first_data[i] && !finish[i])//比min_data更小的数据first_data[i]   
            {  
                min_data = first_data[i];    //则置min_data<-first_data[i]   
                index = i;                   //把下标i 赋给index。   
            }  
        }  

        // write the orderly result to file.   
        fprintf(fp_out_file, "%d ", min_data);  

        if (fscanf(fp_array[index], "%d ", &first_data[index]) == EOF)  
            finish[index] = true;  
    }  

    fclose(fp_out_file);  

    delete []finish;  
    delete []first_data;  

    for (i = 0; i < file_num; i++)  
        fclose(fp_array[i]);  

    delete [] fp_array;  
}  

int main()  
{  
    clock_t start_memory_sort = clock();  
    int aux_file_num = memory_sort();  
    clock_t end_memory_sort = clock();  
    cout << "The time needs in memory sort: " << end_memory_sort - start_memory_sort << endl;  

    clock_t start_merge_sort = clock();  
    merge_sort(aux_file_num);  
    clock_t end_merge_sort = clock();  
    cout << "The time needs in merge sort: " << end_merge_sort - start_merge_sort << endl;  
    system("pause");  
    return 0;  
}  

测试数据:生成1000万个不另行的正整数

澳门金沙国际 6澳门金沙国际 7View Code

//生成随机的不重复的测试数据   
#include <iostream>   
#include <time.h>   
#include <assert.h>   
using namespace std;  

//产生[l,u]区间的随机数   
int randint(int l, int u)  
{  
 return l+(RAND_MAX*rand()+rand())%(u-l+1);  
}  

 //1000W的int,大约4M的数据,如果放在mian内,在我的机子上好像是栈溢出了,放在全局空间就没问题   
const int size = 10000000;  
int num[size];  

int main()  
{  
    int i, j;  
    FILE *fp = fopen("data.txt", "w");  
    assert(fp);  
    for (i = 0; i < size; i++)  
        num[i] = i+1;  
    srand((unsigned)time(NULL));  
    for (i = 0; i < size; i++)  
    {  
        j = randint(i, size-1);  
        int t = num[i]; num[i] = num[j]; num[j] = t;  
        //swap(num[i], num[j]);   
    }  
    for (i = 0; i < size; i++)  
        fprintf(fp, "%d ", num[i]);  
    fclose(fp);  
    return 0;  
}  

 

 

相关文章