001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.IOException; 020import java.io.OutputStream; 021import java.io.Writer; 022import java.nio.ByteBuffer; 023import java.nio.CharBuffer; 024import java.nio.charset.Charset; 025import java.nio.charset.CharsetDecoder; 026import java.nio.charset.CoderResult; 027import java.nio.charset.CodingErrorAction; 028import java.nio.charset.StandardCharsets; 029 030import org.apache.commons.io.Charsets; 031import org.apache.commons.io.IOUtils; 032import org.apache.commons.io.build.AbstractStreamBuilder; 033import org.apache.commons.io.charset.CharsetDecoders; 034 035/** 036 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to 037 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled 038 * correctly. 039 * <p> 040 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in 041 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()} 042 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can 043 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer}, 044 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}. 045 * </p> 046 * <p> 047 * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter}; in the following example, writing to {@code out2} 048 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding): 049 * </p> 050 * <p> 051 * To build an instance, see {@link Builder}. 052 * </p> 053 * <pre> 054 * OutputStream out = ... 055 * Charset cs = ... 056 * OutputStreamWriter writer = new OutputStreamWriter(out, cs); 057 * WriterOutputStream out2 = WriterOutputStream.builder() 058 * .setWriter(writer) 059 * .setCharset(cs) 060 * .get(); 061 * </pre> 062 * <p> 063 * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader}, except that the control flow is reversed: both classes 064 * transform a byte stream into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream, while 065 * {@link WriterOutputStream} pushes it to the underlying stream. 066 * </p> 067 * <p> 068 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in 069 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is 070 * known to represent character data that must be decoded for further use. 071 * </p> 072 * <p> 073 * Instances of {@link WriterOutputStream} are not thread safe. 074 * </p> 075 * 076 * @see org.apache.commons.io.input.ReaderInputStream 077 * @since 2.0 078 */ 079public class WriterOutputStream extends OutputStream { 080 081 /** 082 * Builds a new {@link WriterOutputStream} instance. 083 * <p> 084 * For example: 085 * </p> 086 * <pre>{@code 087 * WriterOutputStream s = WriterOutputStream.builder() 088 * .setPath(path) 089 * .setBufferSize(8192) 090 * .setCharset(StandardCharsets.UTF_8) 091 * .setWriteImmediately(false) 092 * .get();} 093 * </pre> 094 * 095 * @since 2.12.0 096 */ 097 public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> { 098 099 private CharsetDecoder charsetDecoder; 100 private boolean writeImmediately; 101 102 /** 103 * Constructs a new Builder. 104 */ 105 public Builder() { 106 this.charsetDecoder = getCharset().newDecoder(); 107 } 108 109 /** 110 * Constructs a new instance. 111 * <p> 112 * This builder use the aspect Writer, OpenOption[], Charset, CharsetDecoder, buffer size and writeImmediately. 113 * </p> 114 * <p> 115 * You must provide an origin that can be converted to a Writer by this builder, otherwise, this call will throw an 116 * {@link UnsupportedOperationException}. 117 * </p> 118 * 119 * @return a new instance. 120 * @throws UnsupportedOperationException if the origin cannot provide a Writer. 121 * @see #getWriter() 122 */ 123 @SuppressWarnings("resource") 124 @Override 125 public WriterOutputStream get() throws IOException { 126 return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately); 127 } 128 129 @Override 130 public Builder setCharset(final Charset charset) { 131 super.setCharset(charset); 132 this.charsetDecoder = getCharset().newDecoder(); 133 return this; 134 } 135 136 @Override 137 public Builder setCharset(final String charset) { 138 super.setCharset(charset); 139 this.charsetDecoder = getCharset().newDecoder(); 140 return this; 141 } 142 143 /** 144 * Sets the charset decoder. 145 * 146 * @param charsetDecoder the charset decoder. 147 * @return this 148 */ 149 public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) { 150 this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder(); 151 super.setCharset(this.charsetDecoder.charset()); 152 return this; 153 } 154 155 /** 156 * Sets whether the output buffer will be flushed after each write operation ({@code true}), i.e. all available data will be written to the underlying 157 * {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} 158 * is called. 159 * 160 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 161 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 162 * {@link #flush()} or {@link #close()} is called. 163 * @return this 164 */ 165 public Builder setWriteImmediately(final boolean writeImmediately) { 166 this.writeImmediately = writeImmediately; 167 return this; 168 } 169 170 } 171 172 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 173 174 /** 175 * Constructs a new {@link Builder}. 176 * 177 * @return a new {@link Builder}. 178 * @since 2.12.0 179 */ 180 public static Builder builder() { 181 return new Builder(); 182 } 183 184 /** 185 * Checks if the JDK in use properly supports the given charset. 186 * 187 * @param charset the charset to check the support for 188 */ 189 private static void checkIbmJdkWithBrokenUTF16(final Charset charset) { 190 if (!StandardCharsets.UTF_16.name().equals(charset.name())) { 191 return; 192 } 193 final String TEST_STRING_2 = "v\u00e9s"; 194 final byte[] bytes = TEST_STRING_2.getBytes(charset); 195 196 final CharsetDecoder charsetDecoder2 = charset.newDecoder(); 197 final ByteBuffer bb2 = ByteBuffer.allocate(16); 198 final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length()); 199 final int len = bytes.length; 200 for (int i = 0; i < len; i++) { 201 bb2.put(bytes[i]); 202 bb2.flip(); 203 try { 204 charsetDecoder2.decode(bb2, cb2, i == len - 1); 205 } catch (final IllegalArgumentException e) { 206 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " 207 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 208 } 209 bb2.compact(); 210 } 211 cb2.rewind(); 212 if (!TEST_STRING_2.equals(cb2.toString())) { 213 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " 214 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 215 } 216 217 } 218 219 private final Writer writer; 220 private final CharsetDecoder decoder; 221 222 private final boolean writeImmediately; 223 224 /** 225 * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder. 226 */ 227 private final ByteBuffer decoderIn = ByteBuffer.allocate(128); 228 229 /** 230 * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer. 231 */ 232 private final CharBuffer decoderOut; 233 234 /** 235 * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE} 236 * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called. 237 * 238 * @param writer the target {@link Writer} 239 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 240 */ 241 @Deprecated 242 public WriterOutputStream(final Writer writer) { 243 this(writer, Charset.defaultCharset(), BUFFER_SIZE, false); 244 } 245 246 /** 247 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 248 * when it overflows or when {@link #flush()} or {@link #close()} is called. 249 * 250 * @param writer the target {@link Writer} 251 * @param charset the charset encoding 252 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 253 */ 254 @Deprecated 255 public WriterOutputStream(final Writer writer, final Charset charset) { 256 this(writer, charset, BUFFER_SIZE, false); 257 } 258 259 /** 260 * Constructs a new {@link WriterOutputStream}. 261 * 262 * @param writer the target {@link Writer} 263 * @param charset the charset encoding 264 * @param bufferSize the size of the output buffer in number of characters 265 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 266 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 267 * {@link #flush()} or {@link #close()} is called. 268 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 269 */ 270 @Deprecated 271 public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) { 272 // @formatter:off 273 this(writer, 274 Charsets.toCharset(charset).newDecoder() 275 .onMalformedInput(CodingErrorAction.REPLACE) 276 .onUnmappableCharacter(CodingErrorAction.REPLACE) 277 .replaceWith("?"), 278 bufferSize, 279 writeImmediately); 280 // @formatter:on 281 } 282 283 /** 284 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 285 * when it overflows or when {@link #flush()} or {@link #close()} is called. 286 * 287 * @param writer the target {@link Writer} 288 * @param decoder the charset decoder 289 * @since 2.1 290 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 291 */ 292 @Deprecated 293 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) { 294 this(writer, decoder, BUFFER_SIZE, false); 295 } 296 297 /** 298 * Constructs a new {@link WriterOutputStream}. 299 * 300 * @param writer the target {@link Writer} 301 * @param decoder the charset decoder 302 * @param bufferSize the size of the output buffer in number of characters 303 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 304 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 305 * {@link #flush()} or {@link #close()} is called. 306 * @since 2.1 307 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 308 */ 309 @Deprecated 310 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) { 311 checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset()); 312 this.writer = writer; 313 this.decoder = CharsetDecoders.toCharsetDecoder(decoder); 314 this.writeImmediately = writeImmediately; 315 this.decoderOut = CharBuffer.allocate(bufferSize); 316 } 317 318 /** 319 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 320 * when it overflows or when {@link #flush()} or {@link #close()} is called. 321 * 322 * @param writer the target {@link Writer} 323 * @param charsetName the name of the charset encoding 324 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 325 */ 326 @Deprecated 327 public WriterOutputStream(final Writer writer, final String charsetName) { 328 this(writer, charsetName, BUFFER_SIZE, false); 329 } 330 331 /** 332 * Constructs a new {@link WriterOutputStream}. 333 * 334 * @param writer the target {@link Writer} 335 * @param charsetName the name of the charset encoding 336 * @param bufferSize the size of the output buffer in number of characters 337 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 338 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 339 * {@link #flush()} or {@link #close()} is called. 340 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 341 */ 342 @Deprecated 343 public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) { 344 this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately); 345 } 346 347 /** 348 * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that 349 * {@link Writer#close()} will be called. 350 * 351 * @throws IOException if an I/O error occurs. 352 */ 353 @Override 354 public void close() throws IOException { 355 processInput(true); 356 flushOutput(); 357 writer.close(); 358 } 359 360 /** 361 * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that 362 * {@link Writer#flush()} will be called. 363 * 364 * @throws IOException if an I/O error occurs. 365 */ 366 @Override 367 public void flush() throws IOException { 368 flushOutput(); 369 writer.flush(); 370 } 371 372 /** 373 * Flush the output. 374 * 375 * @throws IOException if an I/O error occurs. 376 */ 377 private void flushOutput() throws IOException { 378 if (decoderOut.position() > 0) { 379 writer.write(decoderOut.array(), 0, decoderOut.position()); 380 decoderOut.rewind(); 381 } 382 } 383 384 /** 385 * Decode the contents of the input ByteBuffer into a CharBuffer. 386 * 387 * @param endOfInput indicates end of input 388 * @throws IOException if an I/O error occurs. 389 */ 390 private void processInput(final boolean endOfInput) throws IOException { 391 // Prepare decoderIn for reading 392 decoderIn.flip(); 393 CoderResult coderResult; 394 while (true) { 395 coderResult = decoder.decode(decoderIn, decoderOut, endOfInput); 396 if (coderResult.isOverflow()) { 397 flushOutput(); 398 } else if (coderResult.isUnderflow()) { 399 break; 400 } else { 401 // The decoder is configured to replace malformed input and unmappable characters, 402 // so we should not get here. 403 throw new IOException("Unexpected coder result"); 404 } 405 } 406 // Discard the bytes that have been read 407 decoderIn.compact(); 408 } 409 410 /** 411 * Write bytes from the specified byte array to the stream. 412 * 413 * @param b the byte array containing the bytes to write 414 * @throws IOException if an I/O error occurs. 415 */ 416 @Override 417 public void write(final byte[] b) throws IOException { 418 write(b, 0, b.length); 419 } 420 421 /** 422 * Write bytes from the specified byte array to the stream. 423 * 424 * @param b the byte array containing the bytes to write 425 * @param off the start offset in the byte array 426 * @param len the number of bytes to write 427 * @throws IOException if an I/O error occurs. 428 */ 429 @Override 430 public void write(final byte[] b, int off, int len) throws IOException { 431 while (len > 0) { 432 final int c = Math.min(len, decoderIn.remaining()); 433 decoderIn.put(b, off, c); 434 processInput(false); 435 len -= c; 436 off += c; 437 } 438 if (writeImmediately) { 439 flushOutput(); 440 } 441 } 442 443 /** 444 * Write a single byte to the stream. 445 * 446 * @param b the byte to write 447 * @throws IOException if an I/O error occurs. 448 */ 449 @Override 450 public void write(final int b) throws IOException { 451 write(new byte[] { (byte) b }, 0, 1); 452 } 453}