1.目前測試了20M的文件,可以讀取。

2.支持單個工作表1萬+的數據行數,耗時如圖。

3.以下是關鍵地方處理的代碼
1 //Accepts objects needed while parsing.
2 // @param styles Table of styles
3 // @param strings Table of shared strings
4 // @param cols Minimum number of columns to show
5 // @param target Sink for output
6 public MyXSSFSheetHandler(
7 StylesTable styles,
8 ReadOnlySharedStringsTable strings,
9 int cols,
10 PrintStream target) {
11 this.stylesTable = styles;
12 this.sharedStringsTable = strings;
13 this.minColumnCount = cols;
14 this.output = target;
15 this.value = new StringBuffer();
16 this.nextDataType = xssfDataType.NUMBER;
17 this.formatter = new DataFormatter();
18 rowlist = new ArrayList<String>(0);
19 rowReader = new RowReader();
20 rowMap = new HashMap<Integer, String>(0);
21 rowString = new StringBuffer();
22 }
23 // @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
24 public void startElement(String uri, String localName, String name,
25 Attributes attributes) throws SAXException
26 {
27
28
29 if ("inlineStr".equals(name) || "v".equals(name))
30 {
31 vIsOpen = true;
32 // Clear contents cache
33 value.setLength(0);
34 }
35 // c => cell
36 else if ("c".equals(name))
37 {
38 // Get the cell reference
39 String r = attributes.getValue("r");
40 int firstDigit = -1;
41 for (int c = 0; c < r.length(); ++c)
42 {
43 if (Character.isDigit(r.charAt(c)))
44 {
45 firstDigit = c;
46 break;
47 }
48 }
49 thisColumn = nameToColumn(r.substring(0, firstDigit));
50
51 // Set up defaults.
52 this.nextDataType = xssfDataType.NUMBER;
53 this.formatIndex = -1;
54 this.formatString = null;
55 String cellType = attributes.getValue("t");
56 String cellStyleStr = attributes.getValue("s");
57 if ("b".equals(cellType))
58 nextDataType = xssfDataType.BOOL;
59 else if ("e".equals(cellType))
60 nextDataType = xssfDataType.ERROR;
61 else if ("inlineStr".equals(cellType))
62 nextDataType = xssfDataType.INLINESTR;
63 else if ("s".equals(cellType))
64 nextDataType = xssfDataType.SSTINDEX;
65 else if ("str".equals(cellType))
66 nextDataType = xssfDataType.FORMULA;
67 else if (cellStyleStr != null) {
68 // It's a number, but almost certainly one
69 // with a special style or format
70 int styleIndex = Integer.parseInt(cellStyleStr);
71 XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
72 this.formatIndex = style.getDataFormat();
73 this.formatString = style.getDataFormatString();
74 if (this.formatString == null)
75 this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
76 }
77 }
78
79 }
80
81
82 // @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
83 public void endElement(String uri, String localName, String name)
84 throws SAXException
85 {
86
87 String thisStr = null;
88
89 // v => contents of a cell
90 if ("v".equals(name))
91 {
92 // Process the value contents as required.
93 // Do now, as characters() may be called more than once
94 switch (nextDataType) {
95
96 case BOOL:
97 char first = value.charAt(0);
98 thisStr = first == '0' ? "FALSE" : "TRUE";
99 break;
100
101 case ERROR:
102 thisStr = "\"ERROR:" + value.toString() + '"';
103 break;
104
105 case FORMULA:
106 // A formula could result in a string value,
107 // so always add double-quote characters.
108 thisStr = '"' + value.toString() + '"';
109 break;
110
111 case INLINESTR:
112 // TODO: have seen an example of this, so it's untested.
113 XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
114 thisStr = '"' + rtsi.toString() + '"';
115 break;
116
117 case SSTINDEX:
118 String sstIndex = value.toString();
119 try {
120 int idx = Integer.parseInt(sstIndex);
121 XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
122 thisStr = '"' + rtss.toString() + '"';
123 } catch (NumberFormatException ex) {
124 output.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
125 }
126 break;
127
128 case NUMBER:
129 String n = value.toString();
130 if (this.formatString != null)
131 thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
132 else
133 thisStr = n;
134 break;
135
136 default:
137 thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
138 break;
139 }
140
141 // Output after we've seen the string contents
142 // Emit commas for any fields that were missing on this row
143 if (lastColumnNumber == -1)
144 {
145 lastColumnNumber = 0;
146 }
147 for (int i = lastColumnNumber; i < thisColumn; ++i)
148 {
149 rowString.append(',');//每天加一個單元格的值到字符串中就追加一個逗號(末尾不添加)
150 //output.print(','); 可以看到使用output是可以將每一個單元格使用逗號分割
//但是如果使用rowlist添加到列表中,卻始終無法得到空單元格的內容
//也就是說:空單元格被忽略了。
//具體請參照標紅的地方進行處理:使用字符串拼接的方式獲得完整的行數據,再使用逗號拆分組合成rowMap
151 }
152 rowString.append(thisStr);// 這條code放在for後面,如果放在前面,會導致0和1兩個單元格合為一個單元格。
153 // Might be the empty string.
154 //output.print(thisStr);
155 rowlist.add(thisStr);
156 // Update column
157 if (thisColumn > -1)
158 {
159 lastColumnNumber = thisColumn;
160 }
161 rowIndex++;
162 }
163 else if ("row".equals(name))
164 {
165
166 // Print out any missing commas if needed
167 if (minColumns > 0)
168 {
169 // Columns are 0 based
170 if (lastColumnNumber == -1)
171 {
172 lastColumnNumber = 0;
173 }
174 for (int i = lastColumnNumber; i < (this.minColumnCount); i++)
175 {
176 output.print(',');
177 }
178 }
179
180 // We're onto a new row
181
182 output.println();
183 output.println(countrows++);
184 lastColumnNumber = -1;
185 rowIndex = 0;
186 //rowMap = rowReader.getRowMap(rowlist);
187 rowMap = rowReader.getRowMapByString(rowString.toString());
188 // ADD =
189 rowLst1000.add(rowMap);
190 rowMap = null;
191 rowMap = new HashMap<Integer, String>(0);
192 if (countrows % 1000 == 0)
193 {
194 rowLst1000n.add(rowLst1000);
195 rowLst1000 = null;
196 rowLst1000 = new ArrayList<Map<Integer, String>>(0);
197 }
198 rowlist.clear();
199 System.out.println(rowString.toString());
200 rowString = null;
201 rowString = new StringBuffer();
202 }
203 }
以上是我自己的處理方式,當然還有其他的處理方式,再研究吧。畢竟寫到此處的時候,我不過是一個不到1年經驗的小菜鳥。
下面附上其余代碼的參照地址:
http://www.360sdn.com/java/2014/0524/3392.html
【做而會】