decoder.js 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. var iconv,
  2. inherits = require('util').inherits,
  3. stream = require('stream');
  4. var regex = /(?:charset|encoding)\s*=\s*['"]? *([\w\-]+)/i;
  5. inherits(StreamDecoder, stream.Transform);
  6. function StreamDecoder(charset) {
  7. if (!(this instanceof StreamDecoder))
  8. return new StreamDecoder(charset);
  9. stream.Transform.call(this, charset);
  10. this.charset = charset;
  11. this.parsed_chunk = false;
  12. }
  13. StreamDecoder.prototype._transform = function(chunk, encoding, done) {
  14. // try to get charset from chunk, but just once
  15. if (!this.parsed_chunk && (this.charset == 'utf-8' || this.charset == 'utf8')) {
  16. this.parsed_chunk = true;
  17. var matches = regex.exec(chunk.toString());
  18. if (matches) {
  19. var found = matches[1].toLowerCase().replace('utf8', 'utf-8'); // canonicalize;
  20. // set charset, but only if iconv can handle it
  21. if (iconv.encodingExists(found)) this.charset = found;
  22. }
  23. }
  24. // if charset is already utf-8 or given encoding isn't supported, just pass through
  25. if (this.charset == 'utf-8' || !iconv.encodingExists(this.charset)) {
  26. this.push(chunk);
  27. return done();
  28. }
  29. // initialize stream decoder if not present
  30. var self = this;
  31. if (!this.decoder) {
  32. this.decoder = iconv.decodeStream(this.charset);
  33. this.decoder.on('data', function(decoded_chunk) {
  34. self.push(decoded_chunk);
  35. });
  36. };
  37. this.decoder.write(chunk);
  38. done();
  39. }
  40. module.exports = function(charset) {
  41. try {
  42. if (!iconv) iconv = require('iconv-lite');
  43. } catch(e) {
  44. /* iconv not found */
  45. }
  46. if (iconv)
  47. return new StreamDecoder(charset);
  48. else
  49. return new stream.PassThrough;
  50. }