1 /*
2 * PROJECT : DAR Runtime and Tools
3 * COPYRIGHT : Copyright (C) 1999-2004 tim.stephenson@enableit.org
4 * LICENSE : GNU LESSER GENERAL PUBLIC LICENSE
5 * Version 2.1, February 1999
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 package org.enableit.db.darrt;
22
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.io.LineNumberReader;
27 import java.util.ArrayList;
28 import java.util.Iterator;
29 import java.util.List;
30
31 import org.apache.log4j.Logger;
32 import org.apache.oro.text.perl.Perl5Util;
33 import org.enableit.db.DBException;
34 import org.enableit.db.SqlType;
35 import org.enableit.db.beans.Col;
36 import org.enableit.db.beans.Column;
37 import org.enableit.db.beans.Row;
38 import org.enableit.db.beans.RowSet;
39 import org.enableit.db.beans.Table;
40
41
42 /***
43 * @author tim.stephenson
44 */
45 public class CsvDataParser implements DataParser {
46 private static Logger logger = Logger.getLogger(CsvDataParser.class);
47
48 /***
49 * A Perl5 regular expression to parse a CSV line from the file.
50 */
51 private String CSV_PARSING_EXPR = "/,/";
52
53 //private String CSV_PARSING_EXPR = "/, NOT'*,*' NOT\"*,*\"/";
54 //private String CSV_PARSING_EXPR = "/, NOT '*,*' NOT \"*,*\"/";
55
56 /***
57 * Default constructor.
58 */
59 public CsvDataParser() {
60 }
61
62 /***
63 * Convert an input stream of CSV data for the specified table into a
64 * structured format of a <code>RowSet</code>.
65 * @param is Any <code>Reader</code> wrapper to input stream.
66 * @param table The name of the target Table for the RowSet.
67 * @return A RowSet instance holding the data from the InputStream.
68 * @throws IOException
69 * @throws DBException
70 */
71 public RowSet getRowSet(InputStream is, Table table)
72 throws IOException, DBException {
73 logger.info("METHOD_ENTRY: getRowSet");
74
75 Perl5Util perl5 = new Perl5Util();
76 RowSet rs = new RowSet();
77
78 rs.setTable(table.getName());
79
80 LineNumberReader lineReader = new LineNumberReader(new InputStreamReader(
81 is));
82
83 // Read and process each row
84 String line = lineReader.readLine();
85 List fields = new ArrayList();
86
87 do {
88 logger.debug("Loading line: " + line);
89
90 Row row = new Row();
91
92 fields.clear();
93
94 perl5.split(fields, CSV_PARSING_EXPR, line);
95 logger.warn("Fields found: " + fields);
96
97 // This occurs with embedded commas and missing surrogate keys
98 // if (table.getColumnCount() != fields.size()) {
99 // remove embedded commas
100 logger.debug("Removing embedded commas...");
101
102 List newFields = new ArrayList();
103 StringBuffer sb = new StringBuffer();
104
105 for (Iterator i = fields.iterator(); i.hasNext();) {
106 String field = (String) i.next();
107
108 logger.debug("Field: " + field);
109
110 String trimmedField = field.trim();
111
112 if ((trimmedField.startsWith("\"")
113 && !trimmedField.endsWith("\""))
114 || (trimmedField.startsWith("'")
115 && !trimmedField.endsWith("'"))) {
116 sb.append(field + ",");
117 } else if (trimmedField.endsWith("\"")
118 || trimmedField.endsWith("'")) {
119 sb.append(field);
120 newFields.add(sb.toString());
121 sb = new StringBuffer();
122 } else if (sb.length() > 0) {
123 // must have more than one embedded comma
124 sb.append(field + ",");
125 } else {
126 newFields.add(field);
127 }
128 }
129
130 fields = newFields;
131
132 // }
133 // account for pks, assume numeric key may be surrogate
134 int autoIncKeyCount = 0;
135
136 if (table.getColumnCount() != fields.size()) {
137 logger.debug("... account for autoincrement keys ...");
138
139 for (int i = 0; i < table.getColumnCount(); i++) {
140 Column col = table.getColumn(i);
141
142 if ("TRUE".equalsIgnoreCase(col.getPrimaryKey())
143 && col.getColType().startsWith("numeric")) {
144 logger.warn("Found potential auto-inc. surrogate key: "
145 + col.getColName());
146 autoIncKeyCount++;
147 }
148 }
149
150 if (autoIncKeyCount > 1) {
151 logger.warn("Found more than one potential auto-"
152 + "incrementing surrogate key, this is not usual "
153 + "practice...");
154 }
155 }
156
157 // retest
158 if ((table.getColumnCount() != fields.size())
159 && ((table.getColumnCount() - autoIncKeyCount) != fields
160 .size())) {
161 String msg = "Column defintions do not match no. of fields: "
162 + "table= " + table.getColumnCount() + " fields= "
163 + fields.size() + " line= " + line;
164
165 throw new DBException(msg);
166 }
167
168 Iterator it = fields.iterator();
169
170 for (int i = 0; i < table.getColumnCount(); i++) {
171 Column colDefn = table.getColumn(i);
172 Col col = new Col();
173
174 col.setName(colDefn.getColName());
175 col.setType(new SqlType(colDefn).getJavaType());
176
177 if ((autoIncKeyCount > 0)
178 && "TRUE".equalsIgnoreCase(colDefn.getPrimaryKey())
179 && colDefn.getColType().startsWith("numeric")) {
180 col.setValue("null");
181 } else if (it.hasNext()) {
182 String value = it.next().toString().trim();
183
184 if (value.startsWith("\"") || value.startsWith("'")) {
185 /*
186 * By this time must have matching start and end quotes
187 * that are not needed from here on
188 */
189 value = value.substring(1, value.length() - 1);
190 }
191
192 col.setValue(value);
193 } else {
194 col.setValue("null");
195 }
196
197 row.addCol(col);
198 }
199
200 rs.addRow(row);
201 line = lineReader.readLine();
202 } while (line != null);
203
204 logger.info("METHOD_EXIT: getRowSetCSV");
205
206 return rs;
207 }
208 }
This page was automatically generated by Maven