Skip to content

Commit cec3176

Browse files
author
Ahmed Eldawy
committed
Speed up deserializing doubles from Text
1 parent a6ef14f commit cec3176

File tree

1 file changed

+65
-3
lines changed

1 file changed

+65
-3
lines changed

src/core/org/apache/hadoop/io/TextSerializerHelper.java

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ public static long deserializeHexLong(byte[] buf, int offset, int len) {
121121
* Deserializes and consumes a long from the given text. Consuming means all
122122
* characters read for deserialization are removed from the given text.
123123
* If separator is non-zero, a long is read and consumed up to the first
124-
* occurence of this separator. The separator is also consumed.
124+
* occurrence of this separator. The separator is also consumed.
125125
* @param text
126126
* @param separator
127127
* @return
@@ -142,12 +142,74 @@ public static long consumeHexLong(Text text, char separator) {
142142
return l;
143143
}
144144

145+
enum DoubleParseState {BeforeDecimal, AfterDecimal, AfterExp};
146+
public static double deserializeDouble(byte[] buf, int offset, int len) {
147+
DoubleParseState state = DoubleParseState.BeforeDecimal;
148+
149+
int exponent1 = 0; // Exponent part coming from the decimal point
150+
boolean exponent2Negative = false;
151+
int exponent2 = 0; // Exponent part written explicitly (e.g., E+32)
152+
153+
boolean mantissaNegative = false;
154+
long mantissa = 0;
155+
156+
while (len-- > 0) {
157+
if (buf[offset] >= '0' && buf[offset] <= '9') {
158+
switch(state) {
159+
case AfterDecimal:
160+
exponent1--;
161+
// Fall through
162+
case BeforeDecimal:
163+
mantissa = mantissa * 10 + (buf[offset] - '0');
164+
break;
165+
case AfterExp:
166+
exponent2 = exponent2 * 10 + (buf[offset] - '0');
167+
break;
168+
}
169+
} else if (buf[offset] == '.') {
170+
state = DoubleParseState.AfterDecimal;
171+
} else if (buf[offset] == 'e' || buf[offset] == 'E') {
172+
state = DoubleParseState.AfterExp;
173+
} else if (buf[offset] == '-') {
174+
if (state == DoubleParseState.BeforeDecimal) {
175+
mantissaNegative = true;
176+
} else if (state == DoubleParseState.AfterExp) {
177+
exponent2Negative = true;
178+
} else {
179+
throw new RuntimeException("Error parsing double "+
180+
new String(buf, offset, len)+" at position "+offset);
181+
}
182+
} else if (buf[offset] == '+') {
183+
// Just skip. The default sign is positive
184+
} else {
185+
throw new RuntimeException("Error parsing double "+
186+
new String(buf, offset, len)+" at position "+offset);
187+
}
188+
offset++;
189+
}
190+
191+
if (mantissaNegative)
192+
mantissa = -mantissa;
193+
if (exponent2Negative)
194+
exponent2 = -exponent2;
195+
196+
int exponent = exponent1 + exponent2;
197+
double d = mantissa;
198+
if (exponent > 0) {
199+
while (exponent-- != 0)
200+
d *= 10;
201+
} else if (exponent < 0) {
202+
while (exponent++ != 0)
203+
d /= 10;
204+
}
205+
return d;
206+
}
145207

146208
/**
147209
* Deserializes and consumes a double from the given text. Consuming means all
148210
* characters read for deserialization are removed from the given text.
149211
* If separator is non-zero, a double is read and consumed up to the first
150-
* occurence of this separator. The separator is also consumed.
212+
* occurrence of this separator. The separator is also consumed.
151213
* @param text
152214
* @param separator
153215
* @return
@@ -158,7 +220,7 @@ public static double consumeDouble(Text text, char separator) {
158220
// Skip until the separator or end of text
159221
while (i < text.getLength() && bytes[i] != separator)
160222
i++;
161-
double d = Double.parseDouble(new String(bytes, 0, i));
223+
double d = deserializeDouble(bytes, 0, i);
162224
if (i < text.getLength())
163225
i++;
164226
System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);

0 commit comments

Comments
 (0)