001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.csv; 019 020import static org.apache.commons.csv.Token.Type.TOKEN; 021 022import java.io.Closeable; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.InputStreamReader; 027import java.io.Reader; 028import java.io.StringReader; 029import java.io.UncheckedIOException; 030import java.net.URL; 031import java.nio.charset.Charset; 032import java.nio.file.Files; 033import java.nio.file.Path; 034import java.util.ArrayList; 035import java.util.Arrays; 036import java.util.Collections; 037import java.util.Iterator; 038import java.util.LinkedHashMap; 039import java.util.List; 040import java.util.Map; 041import java.util.NoSuchElementException; 042import java.util.Objects; 043import java.util.Spliterator; 044import java.util.Spliterators; 045import java.util.TreeMap; 046import java.util.stream.Collectors; 047import java.util.stream.Stream; 048import java.util.stream.StreamSupport; 049 050import org.apache.commons.io.function.Uncheck; 051 052/** 053 * Parses CSV files according to the specified format. 054 * 055 * Because CSV appears in many different dialects, the parser supports many formats by allowing the 056 * specification of a {@link CSVFormat}. 057 * 058 * The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream. 059 * 060 * <h2>Creating instances</h2> 061 * <p> 062 * There are several static factory methods that can be used to create instances for various types of resources: 063 * </p> 064 * <ul> 065 * <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li> 066 * <li>{@link #parse(String, CSVFormat)}</li> 067 * <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li> 068 * </ul> 069 * <p> 070 * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor. 071 * 072 * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut: 073 * </p> 074 * <pre> 075 * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) { 076 * ... 077 * } 078 * </pre> 079 * 080 * <h2>Parsing record wise</h2> 081 * <p> 082 * To parse a CSV input from a file, you write: 083 * </p> 084 * 085 * <pre> 086 * File csvData = new File("/path/to/csv"); 087 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180); 088 * for (CSVRecord csvRecord : parser) { 089 * ... 090 * } 091 * </pre> 092 * 093 * <p> 094 * This will read the parse the contents of the file using the 095 * <a href="https://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format. 096 * </p> 097 * 098 * <p> 099 * To parse CSV input in a format like Excel, you write: 100 * </p> 101 * 102 * <pre> 103 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL); 104 * for (CSVRecord csvRecord : parser) { 105 * ... 106 * } 107 * </pre> 108 * 109 * <p> 110 * If the predefined formats don't match the format at hand, custom formats can be defined. More information about 111 * customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}. 112 * </p> 113 * 114 * <h2>Parsing into memory</h2> 115 * <p> 116 * If parsing record-wise is not desired, the contents of the input can be read completely into memory. 117 * </p> 118 * 119 * <pre> 120 * Reader in = new StringReader("a;b\nc;d"); 121 * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL); 122 * List<CSVRecord> list = parser.getRecords(); 123 * </pre> 124 * 125 * <p> 126 * There are two constraints that have to be kept in mind: 127 * </p> 128 * 129 * <ol> 130 * <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from 131 * the input, those records will not end up in the in-memory representation of your CSV data.</li> 132 * <li>Parsing into memory may consume a lot of system resources depending on the input. For example, if you're 133 * parsing a 150MB file of CSV data the contents will be read completely into memory.</li> 134 * </ol> 135 * 136 * <h2>Notes</h2> 137 * <p> 138 * The internal parser state is completely covered by the format and the reader state. 139 * </p> 140 * 141 * @see <a href="package-summary.html">package documentation for more details</a> 142 */ 143public final class CSVParser implements Iterable<CSVRecord>, Closeable { 144 145 final class CSVRecordIterator implements Iterator<CSVRecord> { 146 private CSVRecord current; 147 148 private CSVRecord getNextRecord() { 149 return Uncheck.get(CSVParser.this::nextRecord); 150 } 151 152 @Override 153 public boolean hasNext() { 154 if (CSVParser.this.isClosed()) { 155 return false; 156 } 157 if (current == null) { 158 current = getNextRecord(); 159 } 160 161 return current != null; 162 } 163 164 @Override 165 public CSVRecord next() { 166 if (CSVParser.this.isClosed()) { 167 throw new NoSuchElementException("CSVParser has been closed"); 168 } 169 CSVRecord next = current; 170 current = null; 171 172 if (next == null) { 173 // hasNext() wasn't called before 174 next = getNextRecord(); 175 if (next == null) { 176 throw new NoSuchElementException("No more CSV records available"); 177 } 178 } 179 180 return next; 181 } 182 183 @Override 184 public void remove() { 185 throw new UnsupportedOperationException(); 186 } 187 } 188 189 /** 190 * Header information based on name and position. 191 */ 192 private static final class Headers { 193 194 /** 195 * Header column positions (0-based) 196 */ 197 final Map<String, Integer> headerMap; 198 199 /** 200 * Header names in column order 201 */ 202 final List<String> headerNames; 203 204 Headers(final Map<String, Integer> headerMap, final List<String> headerNames) { 205 this.headerMap = headerMap; 206 this.headerNames = headerNames; 207 } 208 } 209 210 /** 211 * Creates a parser for the given {@link File}. 212 * 213 * @param file 214 * a CSV file. Must not be null. 215 * @param charset 216 * The Charset to decode the given file. 217 * @param format 218 * the CSVFormat used for CSV parsing. Must not be null. 219 * @return a new parser 220 * @throws IllegalArgumentException 221 * If the parameters of the format are inconsistent or if either file or format are null. 222 * @throws IOException 223 * If an I/O error occurs 224 */ 225 public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException { 226 Objects.requireNonNull(file, "file"); 227 return parse(file.toPath(), charset, format); 228 } 229 230 /** 231 * Creates a CSV parser using the given {@link CSVFormat}. 232 * 233 * <p> 234 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 235 * unless you close the {@code reader}. 236 * </p> 237 * 238 * @param inputStream 239 * an InputStream containing CSV-formatted input. Must not be null. 240 * @param charset 241 * The Charset to decode the given file. 242 * @param format 243 * the CSVFormat used for CSV parsing. Must not be null. 244 * @return a new CSVParser configured with the given reader and format. 245 * @throws IllegalArgumentException 246 * If the parameters of the format are inconsistent or if either reader or format are null. 247 * @throws IOException 248 * If there is a problem reading the header or skipping the first record 249 * @since 1.5 250 */ 251 @SuppressWarnings("resource") 252 public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format) 253 throws IOException { 254 Objects.requireNonNull(inputStream, "inputStream"); 255 Objects.requireNonNull(format, "format"); 256 return parse(new InputStreamReader(inputStream, charset), format); 257 } 258 259 /** 260 * Creates and returns a parser for the given {@link Path}, which the caller MUST close. 261 * 262 * @param path 263 * a CSV file. Must not be null. 264 * @param charset 265 * The Charset to decode the given file. 266 * @param format 267 * the CSVFormat used for CSV parsing. Must not be null. 268 * @return a new parser 269 * @throws IllegalArgumentException 270 * If the parameters of the format are inconsistent or if either file or format are null. 271 * @throws IOException 272 * If an I/O error occurs 273 * @since 1.5 274 */ 275 @SuppressWarnings("resource") 276 public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException { 277 Objects.requireNonNull(path, "path"); 278 Objects.requireNonNull(format, "format"); 279 return parse(Files.newInputStream(path), charset, format); 280 } 281 282 /** 283 * Creates a CSV parser using the given {@link CSVFormat} 284 * 285 * <p> 286 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 287 * unless you close the {@code reader}. 288 * </p> 289 * 290 * @param reader 291 * a Reader containing CSV-formatted input. Must not be null. 292 * @param format 293 * the CSVFormat used for CSV parsing. Must not be null. 294 * @return a new CSVParser configured with the given reader and format. 295 * @throws IllegalArgumentException 296 * If the parameters of the format are inconsistent or if either reader or format are null. 297 * @throws IOException 298 * If there is a problem reading the header or skipping the first record 299 * @since 1.5 300 */ 301 public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException { 302 return new CSVParser(reader, format); 303 } 304 305 /** 306 * Creates a parser for the given {@link String}. 307 * 308 * @param string 309 * a CSV string. Must not be null. 310 * @param format 311 * the CSVFormat used for CSV parsing. Must not be null. 312 * @return a new parser 313 * @throws IllegalArgumentException 314 * If the parameters of the format are inconsistent or if either string or format are null. 315 * @throws IOException 316 * If an I/O error occurs 317 */ 318 public static CSVParser parse(final String string, final CSVFormat format) throws IOException { 319 Objects.requireNonNull(string, "string"); 320 Objects.requireNonNull(format, "format"); 321 322 return new CSVParser(new StringReader(string), format); 323 } 324 325 /** 326 * Creates and returns a parser for the given URL, which the caller MUST close. 327 * 328 * <p> 329 * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless 330 * you close the {@code url}. 331 * </p> 332 * 333 * @param url 334 * a URL. Must not be null. 335 * @param charset 336 * the charset for the resource. Must not be null. 337 * @param format 338 * the CSVFormat used for CSV parsing. Must not be null. 339 * @return a new parser 340 * @throws IllegalArgumentException 341 * If the parameters of the format are inconsistent or if either url, charset or format are null. 342 * @throws IOException 343 * If an I/O error occurs 344 */ 345 @SuppressWarnings("resource") 346 public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException { 347 Objects.requireNonNull(url, "url"); 348 Objects.requireNonNull(charset, "charset"); 349 Objects.requireNonNull(format, "format"); 350 351 return new CSVParser(new InputStreamReader(url.openStream(), charset), format); 352 } 353 354 private String headerComment; 355 356 private String trailerComment; 357 358 private final CSVFormat format; 359 360 private final Headers headers; 361 362 private final Lexer lexer; 363 364 private final CSVRecordIterator csvRecordIterator; 365 366 /** A record buffer for getRecord(). Grows as necessary and is reused. */ 367 private final List<String> recordList = new ArrayList<>(); 368 369 /** 370 * The next record number to assign. 371 */ 372 private long recordNumber; 373 374 /** 375 * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination 376 * with {@link #recordNumber}. 377 */ 378 private final long characterOffset; 379 380 private final Token reusableToken = new Token(); 381 382 /** 383 * Constructs a new instance using the given {@link CSVFormat} 384 * 385 * <p> 386 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 387 * unless you close the {@code reader}. 388 * </p> 389 * 390 * @param reader 391 * a Reader containing CSV-formatted input. Must not be null. 392 * @param format 393 * the CSVFormat used for CSV parsing. Must not be null. 394 * @throws IllegalArgumentException 395 * If the parameters of the format are inconsistent or if either reader or format are null. 396 * @throws IOException 397 * If there is a problem reading the header or skipping the first record 398 */ 399 public CSVParser(final Reader reader, final CSVFormat format) throws IOException { 400 this(reader, format, 0, 1); 401 } 402 403 /** 404 * Constructs a new instance using the given {@link CSVFormat} 405 * 406 * <p> 407 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 408 * unless you close the {@code reader}. 409 * </p> 410 * 411 * @param reader 412 * a Reader containing CSV-formatted input. Must not be null. 413 * @param format 414 * the CSVFormat used for CSV parsing. Must not be null. 415 * @param characterOffset 416 * Lexer offset when the parser does not start parsing at the beginning of the source. 417 * @param recordNumber 418 * The next record number to assign 419 * @throws IllegalArgumentException 420 * If the parameters of the format are inconsistent or if either the reader or format is null. 421 * @throws IOException 422 * If there is a problem reading the header or skipping the first record 423 * @since 1.1 424 */ 425 @SuppressWarnings("resource") 426 public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) 427 throws IOException { 428 Objects.requireNonNull(reader, "reader"); 429 Objects.requireNonNull(format, "format"); 430 this.format = format.copy(); 431 this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); 432 this.csvRecordIterator = new CSVRecordIterator(); 433 this.headers = createHeaders(); 434 this.characterOffset = characterOffset; 435 this.recordNumber = recordNumber - 1; 436 } 437 438 private void addRecordValue(final boolean lastRecord) { 439 final String input = format.trim(reusableToken.content.toString()); 440 if (lastRecord && input.isEmpty() && format.getTrailingDelimiter()) { 441 return; 442 } 443 recordList.add(handleNull(input)); 444 } 445 446 /** 447 * Closes resources. 448 * 449 * @throws IOException 450 * If an I/O error occurs 451 */ 452 @Override 453 public void close() throws IOException { 454 lexer.close(); 455 } 456 457 private Map<String, Integer> createEmptyHeaderMap() { 458 return format.getIgnoreHeaderCase() ? 459 new TreeMap<>(String.CASE_INSENSITIVE_ORDER) : 460 new LinkedHashMap<>(); 461 } 462 463 /** 464 * Creates the name to index mapping if the format defines a header. 465 * 466 * @return null if the format has no header. 467 * @throws IOException if there is a problem reading the header or skipping the first record 468 */ 469 private Headers createHeaders() throws IOException { 470 Map<String, Integer> hdrMap = null; 471 List<String> headerNames = null; 472 final String[] formatHeader = format.getHeader(); 473 if (formatHeader != null) { 474 hdrMap = createEmptyHeaderMap(); 475 String[] headerRecord = null; 476 if (formatHeader.length == 0) { 477 // read the header from the first line of the file 478 final CSVRecord nextRecord = nextRecord(); 479 if (nextRecord != null) { 480 headerRecord = nextRecord.values(); 481 headerComment = nextRecord.getComment(); 482 } 483 } else { 484 if (format.getSkipHeaderRecord()) { 485 final CSVRecord nextRecord = nextRecord(); 486 if (nextRecord != null) { 487 headerComment = nextRecord.getComment(); 488 } 489 } 490 headerRecord = formatHeader; 491 } 492 493 // build the name to index mappings 494 if (headerRecord != null) { 495 // Track an occurrence of a null, empty or blank header. 496 boolean observedMissing = false; 497 for (int i = 0; i < headerRecord.length; i++) { 498 final String header = headerRecord[i]; 499 final boolean blankHeader = CSVFormat.isBlank(header); 500 if (blankHeader && !format.getAllowMissingColumnNames()) { 501 throw new IllegalArgumentException( 502 "A header name is missing in " + Arrays.toString(headerRecord)); 503 } 504 505 final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header); 506 final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode(); 507 final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL; 508 final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY; 509 510 if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) { 511 throw new IllegalArgumentException( 512 String.format( 513 "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", 514 header, Arrays.toString(headerRecord))); 515 } 516 observedMissing |= blankHeader; 517 if (header != null) { 518 hdrMap.put(header, Integer.valueOf(i)); 519 if (headerNames == null) { 520 headerNames = new ArrayList<>(headerRecord.length); 521 } 522 headerNames.add(header); 523 } 524 } 525 } 526 } 527 // Make header names Collection immutable 528 return new Headers(hdrMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames)); 529 } 530 531 /** 532 * Gets the current line number in the input stream. 533 * 534 * <p> 535 * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to 536 * the record number. 537 * </p> 538 * 539 * @return current line number 540 */ 541 public long getCurrentLineNumber() { 542 return lexer.getCurrentLineNumber(); 543 } 544 545 /** 546 * Gets the first end-of-line string encountered. 547 * 548 * @return the first end-of-line string 549 * @since 1.5 550 */ 551 public String getFirstEndOfLine() { 552 return lexer.getFirstEol(); 553 } 554 555 /** 556 * Gets the header comment, if any. 557 * The header comment appears before the header record. 558 * 559 * @return the header comment for this stream, or null if no comment is available. 560 * @since 1.10.0 561 */ 562 public String getHeaderComment() { 563 return headerComment; 564 } 565 566 /** 567 * Gets a copy of the header map as defined in the CSVFormat's header. 568 * <p> 569 * The map keys are column names. The map values are 0-based indices. 570 * </p> 571 * <p> 572 * Note: The map can only provide a one-to-one mapping when the format did not 573 * contain null or duplicate column names. 574 * </p> 575 * 576 * @return a copy of the header map. 577 */ 578 public Map<String, Integer> getHeaderMap() { 579 if (headers.headerMap == null) { 580 return null; 581 } 582 final Map<String, Integer> map = createEmptyHeaderMap(); 583 map.putAll(headers.headerMap); 584 return map; 585 } 586 587 /** 588 * Gets the underlying header map. 589 * 590 * @return the underlying header map. 591 */ 592 Map<String, Integer> getHeaderMapRaw() { 593 return headers.headerMap; 594 } 595 596 /** 597 * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header. 598 * <p> 599 * Note: The list provides strings that can be used as keys in the header map. 600 * The list will not contain null column names if they were present in the input 601 * format. 602 * </p> 603 * 604 * @return read-only list of header names that iterates in column order. 605 * @see #getHeaderMap() 606 * @since 1.7 607 */ 608 public List<String> getHeaderNames() { 609 return Collections.unmodifiableList(headers.headerNames); 610 } 611 612 /** 613 * Gets the current record number in the input stream. 614 * 615 * <p> 616 * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to 617 * the line number. 618 * </p> 619 * 620 * @return current record number 621 */ 622 public long getRecordNumber() { 623 return recordNumber; 624 } 625 626 /** 627 * Parses the CSV input according to the given format and returns the content as a list of 628 * {@link CSVRecord CSVRecords}. 629 * 630 * <p> 631 * The returned content starts at the current parse-position in the stream. 632 * </p> 633 * 634 * @return list of {@link CSVRecord CSVRecords}, may be empty 635 * @throws UncheckedIOException 636 * on parse error or input read-failure 637 */ 638 public List<CSVRecord> getRecords() { 639 return stream().collect(Collectors.toList()); 640 } 641 642 /** 643 * Gets the trailer comment, if any. 644 * Trailer comments are located between the last record and EOF 645 * 646 * @return the trailer comment for this stream, or null if no comment is available. 647 * @since 1.10.0 648 */ 649 public String getTrailerComment() { 650 return trailerComment; 651 } 652 653 /** 654 * Handles whether the input is parsed as null 655 * 656 * @param input 657 * the cell data to further processed 658 * @return null if input is parsed as null, or input itself if the input isn't parsed as null 659 */ 660 private String handleNull(final String input) { 661 final boolean isQuoted = reusableToken.isQuoted; 662 final String nullString = format.getNullString(); 663 final boolean strictQuoteMode = isStrictQuoteMode(); 664 if (input.equals(nullString)) { 665 // nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode 666 return strictQuoteMode && isQuoted ? input : null; 667 } 668 // don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode 669 return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input; 670 } 671 672 /** 673 * Checks whether there is a header comment. 674 * The header comment appears before the header record. 675 * Note that if the parser's format has been given an explicit header 676 * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload) 677 * and the header record is not being skipped 678 * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments 679 * will be associated with the first record, not the header. 680 * 681 * @return true if this parser has seen a header comment, false otherwise 682 * @since 1.10.0 683 */ 684 public boolean hasHeaderComment() { 685 return headerComment != null; 686 } 687 688 /** 689 * Checks whether there is a trailer comment. 690 * Trailer comments are located between the last record and EOF. 691 * The trailer comments will only be available after the parser has 692 * finished processing this stream. 693 * 694 * @return true if this parser has seen a trailer comment, false otherwise 695 * @since 1.10.0 696 */ 697 public boolean hasTrailerComment() { 698 return trailerComment != null; 699 } 700 701 /** 702 * Tests whether this parser is closed. 703 * 704 * @return whether this parser is closed. 705 */ 706 public boolean isClosed() { 707 return lexer.isClosed(); 708 } 709 710 /** 711 * Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}. 712 * 713 * @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or 714 * {@link QuoteMode#NON_NUMERIC}. 715 */ 716 private boolean isStrictQuoteMode() { 717 return format.getQuoteMode() == QuoteMode.ALL_NON_NULL || 718 format.getQuoteMode() == QuoteMode.NON_NUMERIC; 719 } 720 721 /** 722 * Returns the record iterator. 723 * 724 * <p> 725 * An {@link IOException} caught during the iteration is re-thrown as an 726 * {@link IllegalStateException}. 727 * </p> 728 * <p> 729 * If the parser is closed, the iterator will not yield any more records. 730 * A call to {@link Iterator#hasNext()} will return {@code false} and 731 * a call to {@link Iterator#next()} will throw a 732 * {@link NoSuchElementException}. 733 * </p> 734 * <p> 735 * If it is necessary to construct an iterator which is usable after the 736 * parser is closed, one option is to extract all records as a list with 737 * {@link #getRecords()}, and return an iterator to that list. 738 * </p> 739 */ 740 @Override 741 public Iterator<CSVRecord> iterator() { 742 return csvRecordIterator; 743 } 744 745 /** 746 * Parses the next record from the current point in the stream. 747 * 748 * @return the record as an array of values, or {@code null} if the end of the stream has been reached 749 * @throws IOException 750 * on parse error or input read-failure 751 */ 752 CSVRecord nextRecord() throws IOException { 753 CSVRecord result = null; 754 recordList.clear(); 755 StringBuilder sb = null; 756 final long startCharPosition = lexer.getCharacterPosition() + characterOffset; 757 do { 758 reusableToken.reset(); 759 lexer.nextToken(reusableToken); 760 switch (reusableToken.type) { 761 case TOKEN: 762 addRecordValue(false); 763 break; 764 case EORECORD: 765 addRecordValue(true); 766 break; 767 case EOF: 768 if (reusableToken.isReady) { 769 addRecordValue(true); 770 } else if (sb != null) { 771 trailerComment = sb.toString(); 772 } 773 break; 774 case INVALID: 775 throw new IOException("(line " + getCurrentLineNumber() + ") invalid parse sequence"); 776 case COMMENT: // Ignored currently 777 if (sb == null) { // first comment for this record 778 sb = new StringBuilder(); 779 } else { 780 sb.append(Constants.LF); 781 } 782 sb.append(reusableToken.content); 783 reusableToken.type = TOKEN; // Read another token 784 break; 785 default: 786 throw new IllegalStateException("Unexpected Token type: " + reusableToken.type); 787 } 788 } while (reusableToken.type == TOKEN); 789 790 if (!recordList.isEmpty()) { 791 recordNumber++; 792 final String comment = sb == null ? null : sb.toString(); 793 result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment, 794 recordNumber, startCharPosition); 795 } 796 return result; 797 } 798 799 /** 800 * Returns a sequential {@code Stream} with this collection as its source. 801 * <p> 802 * If the parser is closed, the stream will not produce any more values. 803 * See the comments in {@link #iterator()}. 804 * </p> 805 * @return a sequential {@code Stream} with this collection as its source. 806 * @since 1.9.0 807 */ 808 public Stream<CSVRecord> stream() { 809 return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator(), Spliterator.ORDERED), false); 810 } 811 812}