001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.File;
020import java.io.FileNotFoundException;
021import java.io.FileOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024import java.io.OutputStreamWriter;
025import java.io.StringWriter;
026import java.io.Writer;
027import java.nio.charset.Charset;
028import java.nio.charset.StandardCharsets;
029import java.util.Locale;
030import java.util.Objects;
031import java.util.regex.Matcher;
032
033import org.apache.commons.io.Charsets;
034import org.apache.commons.io.IOUtils;
035import org.apache.commons.io.build.AbstractStreamBuilder;
036import org.apache.commons.io.input.XmlStreamReader;
037
038/**
039 * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream.
040 * <p>
041 * To build an instance, see {@link Builder}.
042 * </p>
043 *
044 * @see XmlStreamReader
045 * @since 2.0
046 */
047public class XmlStreamWriter extends Writer {
048
049    /**
050     * Builds a new {@link XmlStreamWriter} instance.
051     * <p>
052     * For example:
053     * </p>
054     * <pre>{@code
055     * WriterOutputStream w = WriterOutputStream.builder()
056     *   .setPath(path)
057     *   .setCharset(StandardCharsets.UTF_8)
058     *   .get();}
059     * </pre>
060     *
061     * @since 2.12.0
062     */
063    public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> {
064
065        /**
066         * Constructs a new Builder.
067         */
068        public Builder() {
069            setCharsetDefault(StandardCharsets.UTF_8);
070            setCharset(StandardCharsets.UTF_8);
071        }
072
073        /**
074         * Constructs a new instance.
075         * <p>
076         * This builder use the aspect OutputStream, OpenOption[], and Charset.
077         * </p>
078         * <p>
079         * You must provide an origin that can be converted to an OutputStream by this builder, otherwise, this call will throw an
080         * {@link UnsupportedOperationException}.
081         * </p>
082         *
083         * @return a new instance.
084         * @throws UnsupportedOperationException if the origin cannot provide an OutputStream.
085         * @throws IOException                   if an I/O error occurs.
086         * @see #getOutputStream()
087         */
088        @SuppressWarnings("resource")
089        @Override
090        public XmlStreamWriter get() throws IOException {
091            return new XmlStreamWriter(getOutputStream(), getCharset());
092        }
093
094    }
095
096    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
097
098    /**
099     * Constructs a new {@link Builder}.
100     *
101     * @return a new {@link Builder}.
102     * @since 2.12.0
103     */
104    public static Builder builder() {
105        return new Builder();
106    }
107
108    private final OutputStream out;
109
110    private final Charset defaultCharset;
111
112    private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);
113
114    private Writer writer;
115
116    private Charset charset;
117
118    /**
119     * Constructs a new XML stream writer for the specified file
120     * with a default encoding of UTF-8.
121     *
122     * @param file The file to write to
123     * @throws FileNotFoundException if there is an error creating or
124     * opening the file
125     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
126     */
127    @Deprecated
128    public XmlStreamWriter(final File file) throws FileNotFoundException {
129        this(file, null);
130    }
131
132    /**
133     * Constructs a new XML stream writer for the specified file
134     * with the specified default encoding.
135     *
136     * @param file The file to write to
137     * @param defaultEncoding The default encoding if not encoding could be detected
138     * @throws FileNotFoundException if there is an error creating or
139     * opening the file
140     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
141     */
142    @Deprecated
143    @SuppressWarnings("resource")
144    public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
145        this(new FileOutputStream(file), defaultEncoding);
146    }
147
148    /**
149     * Constructs a new XML stream writer for the specified output stream
150     * with a default encoding of UTF-8.
151     *
152     * @param out The output stream
153     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
154     */
155    @Deprecated
156    public XmlStreamWriter(final OutputStream out) {
157        this(out, StandardCharsets.UTF_8);
158    }
159
160    /**
161     * Constructs a new XML stream writer for the specified output stream
162     * with the specified default encoding.
163     *
164     * @param out The output stream
165     * @param defaultEncoding The default encoding if not encoding could be detected
166     */
167    private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
168        this.out = out;
169        this.defaultCharset = Objects.requireNonNull(defaultEncoding);
170    }
171
172    /**
173     * Constructs a new XML stream writer for the specified output stream
174     * with the specified default encoding.
175     *
176     * @param out The output stream
177     * @param defaultEncoding The default encoding if not encoding could be detected
178     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
179     */
180    @Deprecated
181    public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
182        this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
183    }
184
185    /**
186     * Closes the underlying writer.
187     *
188     * @throws IOException if an error occurs closing the underlying writer
189     */
190    @Override
191    public void close() throws IOException {
192        if (writer == null) {
193            charset = defaultCharset;
194            writer = new OutputStreamWriter(out, charset);
195            writer.write(prologWriter.toString());
196        }
197        writer.close();
198    }
199
200    /**
201     * Detects the encoding.
202     *
203     * @param cbuf the buffer to write the characters from
204     * @param off The start offset
205     * @param len The number of characters to write
206     * @throws IOException if an error occurs detecting the encoding
207     */
208    private void detectEncoding(final char[] cbuf, final int off, final int len)
209            throws IOException {
210        int size = len;
211        final StringBuffer xmlProlog = prologWriter.getBuffer();
212        if (xmlProlog.length() + len > BUFFER_SIZE) {
213            size = BUFFER_SIZE - xmlProlog.length();
214        }
215        prologWriter.write(cbuf, off, size);
216
217        // try to determine encoding
218        if (xmlProlog.length() >= 5) {
219            if (xmlProlog.substring(0, 5).equals("<?xml")) {
220                // try to extract encoding from XML prolog
221                final int xmlPrologEnd = xmlProlog.indexOf("?>");
222                if (xmlPrologEnd > 0) {
223                    // ok, full XML prolog written: let's extract encoding
224                    final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
225                            xmlPrologEnd));
226                    if (m.find()) {
227                        final String encName = m.group(1).toUpperCase(Locale.ROOT);
228                        charset = Charset.forName(encName.substring(1, encName.length() - 1));
229                    } else {
230                        // no encoding found in XML prolog: using default
231                        // encoding
232                        charset = defaultCharset;
233                    }
234                } else if (xmlProlog.length() >= BUFFER_SIZE) {
235                    // no encoding found in first characters: using default
236                    // encoding
237                    charset = defaultCharset;
238                }
239            } else {
240                // no XML prolog: using default encoding
241                charset = defaultCharset;
242            }
243            if (charset != null) {
244                // encoding has been chosen: let's do it
245                prologWriter = null;
246                writer = new OutputStreamWriter(out, charset);
247                writer.write(xmlProlog.toString());
248                if (len > size) {
249                    writer.write(cbuf, off + size, len - size);
250                }
251            }
252        }
253    }
254
255    /**
256     * Flushes the underlying writer.
257     *
258     * @throws IOException if an error occurs flushing the underlying writer
259     */
260    @Override
261    public void flush() throws IOException {
262        if (writer != null) {
263            writer.flush();
264        }
265    }
266
267    /**
268     * Returns the default encoding.
269     *
270     * @return the default encoding
271     */
272    public String getDefaultEncoding() {
273        return defaultCharset.name();
274    }
275
276    /**
277     * Returns the detected encoding.
278     *
279     * @return the detected encoding
280     */
281    public String getEncoding() {
282        return charset.name();
283    }
284
285    /**
286     * Writes the characters to the underlying writer, detecting encoding.
287     *
288     * @param cbuf the buffer to write the characters from
289     * @param off The start offset
290     * @param len The number of characters to write
291     * @throws IOException if an error occurs detecting the encoding
292     */
293    @Override
294    public void write(final char[] cbuf, final int off, final int len) throws IOException {
295        if (prologWriter != null) {
296            detectEncoding(cbuf, off, len);
297        } else {
298            writer.write(cbuf, off, len);
299        }
300    }
301}