TextBuffer.h
1 /*
2  * Copyright (C) 2004-2013 Marc Boris Duerner
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * As a special exception, you may use this file as part of a free
10  * software library without restriction. Specifically, if other files
11  * instantiate templates or use macros or inline functions from this
12  * file, or you compile this file and link it with other files to
13  * produce an executable, this file does not by itself cause the
14  * resulting executable to be covered by the GNU General Public
15  * License. This exception does not however invalidate any other
16  * reasons why the executable file might be covered by the GNU Library
17  * General Public License.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 
29 #ifndef Pt_TextBuffer_h
30 #define Pt_TextBuffer_h
31 
32 #include <Pt/Api.h>
33 #include <Pt/String.h>
34 #include <Pt/TextCodec.h>
35 #include <Pt/StreamBuffer.h>
36 #include <Pt/ConversionError.h>
37 
38 namespace Pt {
39 
50 template <typename CharT, typename ByteT>
51 class BasicTextBuffer : public BasicStreamBuffer<CharT>
52 {
53  public:
55  typedef ByteT extern_type;
56 
58  typedef CharT intern_type;
59 
61  typedef CharT char_type;
62 
64  typedef typename std::char_traits<CharT> traits_type;
65 
67  typedef typename traits_type::int_type int_type;
68 
70  typedef typename traits_type::pos_type pos_type;
71 
73  typedef typename traits_type::off_type off_type;
74 
77 
78  typedef typename CodecType::result CodecResult;
79  typedef MBState state_type;
80 
81  public:
90  explicit BasicTextBuffer(std::basic_ios<extern_type>& target, CodecType* codec = 0)
91  : _ebufsize(0)
92  , _codec(codec)
93  , _codecRefs( codec ? codec->refs() : 0 )
94  , _target(&target)
95  {
96  this->setg(0, 0, 0);
97  this->setp(0, 0);
98  }
99 
109  : _ebufsize(0)
110  , _codec(codec)
111  , _codecRefs( codec ? codec->refs() : 0 )
112  , _target(0)
113  {
114  this->setg(0, 0, 0);
115  this->setp(0, 0);
116  }
117 
121  {
122  // if _codecRefs is greater than 0, the codec might have been
123  // destroyed before the text buffer, therefore we work with a
124  // separate refcount to mark owned codecs
125 
126  if(_codecRefs == 0)
127  delete _codec;
128  }
129 
133  { return _codec; }
134 
141  {
142  _state = state_type();
143 
144  if(_codecRefs == 0)
145  delete _codec;
146 
147  _codec = codec;
148  _codecRefs = codec ? codec->refs() : 0;
149  }
150 
153  void attach(std::basic_ios<extern_type>& target)
154  {
155  _target = &target;
156  }
157 
160  void detach()
161  {
162  _target = 0;
163  }
164 
170  void reset()
171  {
172  discard();
173  detach();
174  }
175 
181  void reset(std::basic_ios<extern_type>& target)
182  {
183  discard();
184  attach(target);
185  }
186 
189  void discard()
190  {
191  _ebufsize = 0;
192  this->setp(0, 0);
193  this->setg(0, 0, 0);
194 
195  _state = state_type();
196  }
197 
202  std::streamsize import(std::streamsize size = 0)
203  {
204  if( this->pptr() )
205  {
206  discard();
207  }
208 
209  if( ! this->gptr() )
210  {
211  this->setg(_ibuf, _ibuf, _ibuf);
212  }
213 
214  std::streamsize n = 0;
215  std::basic_streambuf<extern_type>* rdbuf = _target ? _target->rdbuf()
216  : 0;
217 
218  // special case: read available input
219  if(size == 0 && rdbuf)
220  size = rdbuf->in_avail();
221 
222  // not more than input buffer size
223  const std::streamsize ebufavail = _ebufmax - _ebufsize;
224  size = ebufavail < size ? ebufavail : size;
225 
226  if(size > 0 && rdbuf)
227  {
228  n = rdbuf->sgetn( _ebuf + _ebufsize, size );
229  _ebufsize += static_cast<int>(n);
230  if(n <= 0)
231  {
232  // rdbuf == 0 means "at end of input"
233  rdbuf = 0;
234  }
235  }
236 
237  if( this->gptr() - this->eback() > _pbmax)
238  {
239  std::streamsize movelen = this->egptr() - this->gptr() + _pbmax;
240  std::char_traits<char_type>::move( _ibuf,
241  this->gptr() - _pbmax,
242  static_cast<std::size_t>(movelen));
243  this->setg(_ibuf, _ibuf + _pbmax, _ibuf + movelen);
244  }
245 
246  typename CodecType::result r = decode();
247 
248  // fail if partial conversion at the end of input
249  // rdbuf == 0 means "at end of input"
250  if( rdbuf == 0 && _ebufsize == 0 && r == CodecType::partial)
251  throw ConversionError("character encoding");
252 
253  return n;
254  }
255 
260  std::streamsize import(const extern_type* buf, std::streamsize size)
261  {
262  if( this->pptr() )
263  {
264  discard();
265  }
266 
267  if( ! this->gptr() )
268  {
269  this->setg(_ibuf, _ibuf, _ibuf);
270  }
271 
272  const std::streamsize bufavail = _ebufmax - _ebufsize;
273  size = bufavail < size ? bufavail : size;
274  if(size > 0)
275  {
276  std::char_traits<extern_type>::copy( _ebuf + _ebufsize, buf, static_cast<std::size_t>(size) );
277  _ebufsize += static_cast<int>(size);
278  }
279 
280  if( this->gptr() - this->eback() > _pbmax)
281  {
282  std::streamsize movelen = this->egptr() - this->gptr() + _pbmax;
283  std::char_traits<char_type>::move( _ibuf,
284  this->gptr() - _pbmax,
285  static_cast<std::size_t>(movelen));
286  this->setg(_ibuf, _ibuf + _pbmax, _ibuf + movelen);
287  }
288 
289  decode();
290 
291  return size;
292  }
293 
294  protected:
295  // inheritdoc
296  virtual int sync()
297  {
298  if( ! this->pptr() )
299  return 0;
300 
301  // write the whole buffer to the underlying stream.
302  while( this->pptr() > this->pbase() )
303  {
304  if( this->overflow( traits_type::eof() ) == traits_type::eof() )
305  return -1;
306  }
307 
308  // unshift the character sequence
309  if( _target && _target->rdbuf() && _codec )
310  {
311  typename CodecType::result res = CodecType::error;
312 
313  do
314  {
315  extern_type* next = 0;
316  res = _codec->unshift(_state, _ebuf, _ebuf + _ebufmax, next);
317  if(res == CodecType::noconv)
318  break;
319 
320  if(res == CodecType::error)
321  throw ConversionError("character encoding");
322 
323  std::streamsize n = static_cast<std::streamsize>(next - _ebuf);
324  if( n != _target->rdbuf()->sputn(_ebuf, n) )
325  return -1;
326  }
327  while(res == CodecType::partial);
328  }
329 
330  discard();
331 
332  return 0;
333  }
334 
335  // inheritdoc
336  virtual std::streamsize showmanyc()
337  {
338  //return _target && _target->rdbuf() ? _target->rdbuf()->in_avail()
339  // : -1;
340 
341  // Return 0, because we can not predict how many characters
342  // can be decoded from the available external byte sequence.
343  // Also, if we returned a number > 0, the next call to underflow()
344  // must exactly read this number of bytes without blocking.
345 
346  return _target && _target->rdbuf() ? 0 : -1;
347  }
348 
349  // inheritdoc
350  virtual int_type overflow( int_type ch = traits_type::eof() )
351  {
352  if( ! _target || ! _target->rdbuf() || this->gptr() )
353  return traits_type::eof();
354 
355  if( ! this->pptr() )
356  {
357  this->setp( _ibuf, _ibuf + _ibufmax );
358  }
359 
360  while( this->pptr() > this->pbase() )
361  {
362  const char_type* fromBegin = _ibuf;
363  const char_type* fromEnd = this->pptr();
364  const char_type* fromNext = fromBegin;
365  extern_type* toBegin = _ebuf + _ebufsize;
366  extern_type* toEnd = _ebuf + _ebufmax;
367  extern_type* toNext = toBegin;
368 
369  typename CodecType::result res = CodecType::noconv;
370  if(_codec)
371  res = _codec->out(_state, fromBegin, fromEnd, fromNext, toBegin, toEnd, toNext);
372 
373  if(res == CodecType::noconv)
374  {
375  std::size_t fromSize = fromEnd - fromBegin;
376  std::size_t toSize = toEnd - toBegin;
377  std::size_t size = toSize < fromSize ? toSize : fromSize;
378 
379  this->copyChars( toBegin, fromBegin, size );
380 
381  fromNext += size;
382  toNext += size;
383  }
384 
385  _ebufsize += static_cast<int>(toNext - toBegin);
386  std::size_t leftover = fromEnd - fromNext;
387  if(leftover && fromNext > fromBegin)
388  {
389  std::char_traits<char_type>::move(_ibuf, fromNext, leftover);
390  }
391 
392  this->setp( _ibuf, _ibuf + _ibufmax );
393  this->pbump( static_cast<int>(leftover) );
394 
395  if(res == CodecType::error)
396  throw ConversionError("character encoding");
397 
398  if(res == CodecType::partial && _ebufsize == 0)
399  break;
400 
401  _ebufsize -= static_cast<int>(_target->rdbuf()->sputn(_ebuf, _ebufsize));
402  if(_ebufsize)
403  return traits_type::eof();
404  }
405 
406  if( ! traits_type::eq_int_type(ch, traits_type::eof()) )
407  {
408  *( this->pptr() ) = traits_type::to_char_type(ch);
409  this->pbump(1);
410  }
411 
412  return traits_type::not_eof(ch);
413  }
414 
415  // inheritdoc
416  virtual int_type underflow()
417  {
418  if( this->gptr() < this->egptr() )
419  return traits_type::to_int_type( *this->gptr() );
420 
421  import(_ebufmax);
422 
423  return this->gptr() < this->egptr() ? traits_type::to_int_type( *this->gptr() )
424  : traits_type::eof();
425  }
426 
427  private:
429  CodecResult decode()
430  {
431  const extern_type* fromBegin = _ebuf;
432  const extern_type* fromEnd = _ebuf + _ebufsize;
433  const extern_type* fromNext = fromBegin;
434  char_type* toBegin = this->egptr();
435  char_type* toEnd = _ibuf + _ibufmax;
436  char_type* toNext = toBegin;
437 
438  typename CodecType::result r = CodecType::noconv;
439  if(_codec)
440  r = _codec->in(_state, fromBegin, fromEnd, fromNext, toBegin, toEnd, toNext);
441 
442  if(r == CodecType::noconv)
443  {
444  // copy characters and advance fromNext and toNext
445  int n =_ebufsize > _ibufmax ? _ibufmax : _ebufsize ;
446  this->copyChars(toBegin, fromBegin, n);
447  fromNext += n;
448  toNext += n;
449  }
450 
451  std::streamsize consumed = fromNext - fromBegin;
452  if(consumed > 0)
453  {
454  _ebufsize -= static_cast<int>(consumed);
455  std::char_traits<extern_type>::move( _ebuf, _ebuf + consumed, _ebufsize);
456  }
457 
458  std::streamsize generated = toNext - toBegin;
459  if(generated)
460  {
461  this->setg(this->eback(), // start of read buffer
462  this->gptr(), // gptr position
463  this->egptr() + generated ); // end of read buffer
464  }
465 
466  if(r == CodecType::error)
467  throw ConversionError("character encoding");
468 
469  return r;
470  }
471 
473  template <typename T>
474  void copyChars(T* s1, const T* s2, std::size_t n)
475  {
476  std::char_traits<T>::copy(s1, s2, n);
477  }
478 
480  template <typename A, typename B>
481  void copyChars(A* s1, const B* s2, std::size_t n)
482  {
483  while(n-- > 0)
484  {
485  *s1 = *s2;
486  ++s1;
487  ++s2;
488  }
489  }
490 
492  template <typename A>
493  void copyChars(A* s1, const Char* s2, std::size_t n)
494  {
495  while(n-- > 0)
496  {
497  *s1 = static_cast<A>(s2->value());
498  ++s1;
499  ++s2;
500  }
501  }
502 
503  private:
504  static const int _pbmax = 4;
505 
506  static const int _ebufmax = 256;
507  extern_type _ebuf[_ebufmax];
508  int _ebufsize;
509 
510  static const int _ibufmax = 256;
511  intern_type _ibuf[_ibufmax];
512 
513  state_type _state;
514  CodecType* _codec;
515  std::size_t _codecRefs;
516  std::basic_ios<extern_type>* _target;
517 };
518 
531 typedef BasicTextBuffer<Pt::Char, char> TextBuffer;
532 
534 //*/
535 //class PT_API TextBuffer : public BasicTextBuffer<Pt::Char, char>
536 //{
537 // public:
538 // typedef TextCodec<Pt::Char, char> Codec;
539 //
540 // public:
541 // /** @brief Constructs a new TextBuffer
542 // */
543 // explicit TextBuffer(std::ios& buffer, Codec* codec = 0);
544 //
545 // explicit TextBuffer(Codec* codec = 0);
546 //};
547 
548 } // namespace Pt
549 
550 #endif // Pt_TextBuffer_h
Converts character sequences with different encodings.
Definition: TextBuffer.h:51
TextCodec< char_type, extern_type > CodecType
Codec type.
Definition: TextBuffer.h:76
void reset(std::basic_ios< extern_type > &target)
Resets the buffer and target.
Definition: TextBuffer.h:181
void discard()
Discards the buffer.
Definition: TextBuffer.h:189
result unshift(MBState &state, ByteT *to, ByteT *to_end, ByteT *&to_next) const
Unshifts a character sequence.
Indicates a failed conversion.
Definition: ConversionError.h:45
BasicTextBuffer(CodecType *codec=0)
Construct with codec.
Definition: TextBuffer.h:108
traits_type::int_type int_type
Integer type.
Definition: TextBuffer.h:67
Buffer for input and output streams.
Definition: StreamBuffer.h:51
CodecType * codec()
Returns the used codec or a nullptr.
Definition: TextBuffer.h:132
Converts between character encodings.
Definition: TextCodec.h:38
ByteT extern_type
External character type.
Definition: TextBuffer.h:55
traits_type::off_type off_type
Stream offset type.
Definition: TextBuffer.h:73
result out(MBState &state, const CharT *fbeg, const CharT *fend, const CharT *&fnext, ByteT *tbeg, ByteT *tend, ByteT *&tnext) const
Encodes a character sequence.
CharT intern_type
Internal character type.
Definition: TextBuffer.h:58
traits_type::pos_type pos_type
Stream position type.
Definition: TextBuffer.h:70
void reset()
Resets the buffer and target.
Definition: TextBuffer.h:170
CharT char_type
Internal character type.
Definition: TextBuffer.h:61
result in(MBState &s, const ByteT *fbeg, const ByteT *fend, const ByteT *&fnext, CharT *tbeg, CharT *tend, CharT *&tnext) const
Decodes a character sequence.
void setCodec(CodecType *codec)
Sets the text codec.
Definition: TextBuffer.h:140
void detach()
Detach from external target.
Definition: TextBuffer.h:160
BasicTextBuffer(std::basic_ios< extern_type > &target, CodecType *codec=0)
Construct with external device and codec.
Definition: TextBuffer.h:90
std::char_traits< CharT > traits_type
Internal character traits.
Definition: TextBuffer.h:64
~BasicTextBuffer()
Destructor.
Definition: TextBuffer.h:120
void attach(std::basic_ios< extern_type > &target)
Attach to external target.
Definition: TextBuffer.h:153