开发者

How do you follow an HTTP Redirect in Node.js?

I want to open a page up in node and process th开发者_JAVA技巧e contents in my application. Something like this seems to work well:

var opts = {host: host, path:pathname, port: 80};
http.get(opts, function(res) {
  var page = '';
  res.on('data', function (chunk) {
    page += chunk;
  });
  res.on('end', function() {
     // process page
  });

This doesn't work, however, if the page returns an 301/302 redirect. How would I do that in a reusable way in case there are multiple redirects? Is there a wrapper module on top of the http to more easily handle processing http responses from a node application?


If all you want to do is follow redirects but still want to use the built-in HTTP and HTTPS modules, I suggest you use https://github.com/follow-redirects/follow-redirects.

yarn add follow-redirects
npm install follow-redirects

All you need to do is replace:

var http = require('http');

with

var http = require('follow-redirects').http;

... and all your requests will automatically follow redirects.

With TypeScript you can also install the types

npm install @types/follow-redirects

and then use

import { http, https } from 'follow-redirects';

Disclosure: I wrote this module.


Is there a wrapper module on top of the http to more easily handle processing http responses from a node application?

request

Redirection logic in request


Make another request based on response.headers.location:

      const request = function(url) {
        lib.get(url, (response) => {
          var body = [];
          if (response.statusCode == 302) {
            body = [];
            request(response.headers.location);
          } else {
            response.on("data", /*...*/);
            response.on("end", /*...*/);
          };
        } ).on("error", /*...*/);
      };
      request(url);


Update:

Now you can follow all redirects with var request = require('request'); using the followAllRedirects param.

request({
  followAllRedirects: true,
  url: url
}, function (error, response, body) {
  if (!error) {
    console.log(response);
  }
});


Here is my (recursive) approach to download JSON with plain node, no packages required.

import https from "https";

function get(url, resolve, reject) {
  https.get(url, (res) => {
    // if any other status codes are returned, those needed to be added here
    if(res.statusCode === 301 || res.statusCode === 302) {
      return get(res.headers.location, resolve, reject)
    }

    let body = [];

    res.on("data", (chunk) => {
      body.push(chunk);
    });

    res.on("end", () => {
      try {
        // remove JSON.parse(...) for plain data
        resolve(JSON.parse(Buffer.concat(body).toString()));
      } catch (err) {
        reject(err);
      }
    });
  });
}

async function getData(url) {
  return new Promise((resolve, reject) => get(url, resolve, reject));
}

// call
getData("some-url-with-redirect").then((r) => console.log(r));


Here is function I use to fetch the url that have redirect:

const http = require('http');
const url = require('url');

function get({path, host}, callback) {
    http.get({
        path,
        host
    }, function(response) {
        if (response.headers.location) {    
            var loc = response.headers.location;
            if (loc.match(/^http/)) {
                loc = new Url(loc);
                host = loc.host;
                path = loc.path;
            } else {
                path = loc;
            }
            get({host, path}, callback);
        } else {
            callback(response);
        }
    });
}

it work the same as http.get but follow redirect.


In case of PUT or POST Request. if you receive statusCode 405 or method not allowed. Try this implementation with "request" library, and add mentioned properties.
followAllRedirects: true,
followOriginalHttpMethod: true

       const options = {
           headers: {
               Authorization: TOKEN,
               'Content-Type': 'application/json',
               'Accept': 'application/json'
           },
           url: `https://${url}`,
           json: true,
           body: payload,
           followAllRedirects: true,
           followOriginalHttpMethod: true
       }

       console.log('DEBUG: API call', JSON.stringify(options));
       request(options, function (error, response, body) {
       if (!error) {
        console.log(response);
        }
     });
}


If you have https server, change your url to use https:// protocol.

I got into similar issue with this one. My url has http:// protocol and I want to make a POST request, but the server wants to redirect it to https. What happen is that, turns out to be node http behavior sends the redirect request (next) in GET method which is not the case.

What I did is to change my url to https:// protocol and it works.


Possibly a little bit of a necromancing post here, but...

here's a function that follows up to 10 redirects, and detects infinite redirect loops. also parses result into JSON

Note - uses a callback helper (shown at the end of this post)

( TLDR; full working demo in context here or remixed-version here)

function getJSON(url,cb){

    var callback=errBack(cb);
    //var callback=errBack(cb,undefined,false);//replace previous line with this to turn off logging

    if (typeof url!=='string') {
        return callback.error("getJSON:expecting url as string");
    }

    if (typeof cb!=='function') {
        return callback.error("getJSON:expecting cb as function");
    }

    var redirs = [url],
    fetch = function(u){
        callback.info("hitting:"+u);
        https.get(u, function(res){
            var body = [];
            callback.info({statusCode:res.statusCode});
            if ([301,302].indexOf(res.statusCode)>=0) {
                if (redirs.length>10) {
                    return callback.error("excessive 301/302 redirects detected");
                } else {
                    if (redirs.indexOf(res.headers.location)<0) {
                        redirs.push(res.headers.location);
                        return fetch(res.headers.location);
                    } else {
                        return callback.error("301/302 redirect loop detected");
                    }
                }
            } else {
              res.on('data', function(chunk){
                  body.push(chunk);
                  callback.info({onData:{chunkSize:chunk.length,chunks:body.length}});
              });
              res.on('end', function(){
                  try {
                      // convert to a single buffer
                      var json = Buffer.concat(body);
                      console.info({onEnd:{chunks:body.length,bodyLength:body.length}});

                      // parse the buffer as json
                      return callback.result(JSON.parse(json),json);
                  } catch (err) {

                      console.error("exception in getJSON.fetch:",err.message||err);

                      if (json.length>32) {
                        console.error("json==>|"+json.toString('utf-8').substr(0,32)+"|<=== ... (+"+(json.length-32)+" more bytes of json)");
                      } else {
                          console.error("json==>|"+json.toString('utf-8')+"|<=== json");
                      }

                      return callback.error(err,undefined,json);
                  }
              });
            }
        });
    };
    fetch(url);   
}

Note - uses a callback helper (shown below)

you can paste this into the node console and it should run as is.

( or for full working demo in context see here )

var 

fs      = require('fs'),
https   = require('https');

function errBack (cb,THIS,logger) {

   var 
   self,
   EB=function(fn,r,e){
       if (logger===false) {
           fn.log=fn.info=fn.warn=fn.errlog=function(){};       
       } else {
           fn.log        = logger?logger.log   : console.log.bind(console);
           fn.info       = logger?logger.info  : console.info.bind(console);
           fn.warn       = logger?logger.warn  : console.warn.bind(console);
           fn.errlog     = logger?logger.error : console.error.bind(console);
       }
       fn.result=r;
       fn.error=e;
       return (self=fn);
   };


   if (typeof cb==='function') {
       return EB(

            logger===false // optimization when not logging - don't log errors
            ?   function(err){
                   if (err) {
                      cb (err);
                     return true;
                   }
                   return false;
               }

            :  function(err){
                   if (err) {
                      self.errlog(err);
                      cb (err);
                     return true;
                   }
                   return false;
               },

           function () {
               return cb.apply (THIS,Array.prototype.concat.apply([undefined],arguments));
           },
           function (err) {
               return cb.apply (THIS,Array.prototype.concat.apply([typeof err==='string'?new Error(err):err],arguments));
           }
       );
   } else {

       return EB(

           function(err){
               if (err) {
                   if (typeof err ==='object' && err instanceof Error) {
                       throw err;
                   } else {
                       throw new Error(err);
                   }
                   return true;//redundant due to throw, but anyway.
               }
               return false;
           },

           logger===false
              ? self.log //optimization :resolves to noop when logger==false
              : function () {
                   self.info("ignoring returned arguments:",Array.prototype.concat.apply([],arguments));
           },

           function (err) {
               throw typeof err==='string'?new Error(err):err;
           }
       );
   }
}

function getJSON(url,cb){

    var callback=errBack(cb);

    if (typeof url!=='string') {
        return callback.error("getJSON:expecting url as string");
    }

    if (typeof cb!=='function') {
        return callback.error("getJSON:expecting cb as function");
    }

    var redirs = [url],
    fetch = function(u){
        callback.info("hitting:"+u);
        https.get(u, function(res){
            var body = [];
            callback.info({statusCode:res.statusCode});
            if ([301,302].indexOf(res.statusCode)>=0) {
                if (redirs.length>10) {
                    return callback.error("excessive 302 redirects detected");
                } else {
                    if (redirs.indexOf(res.headers.location)<0) {
                        redirs.push(res.headers.location);
                        return fetch(res.headers.location);
                    } else {
                        return callback.error("302 redirect loop detected");
                    }
                }
            } else {
              res.on('data', function(chunk){
                  body.push(chunk);
                  console.info({onData:{chunkSize:chunk.length,chunks:body.length}});
              });
              res.on('end', function(){
                  try {
                      // convert to a single buffer
                      var json = Buffer.concat(body);
                      callback.info({onEnd:{chunks:body.length,bodyLength:body.length}});

                      // parse the buffer as json
                      return callback.result(JSON.parse(json),json);
                  } catch (err) {
                      // read with "bypass refetch" option
                      console.error("exception in getJSON.fetch:",err.message||err);

                      if (json.length>32) {
                        console.error("json==>|"+json.toString('utf-8').substr(0,32)+"|<=== ... (+"+(json.length-32)+" more bytes of json)");
                      } else {
                          console.error("json==>|"+json.toString('utf-8')+"|<=== json");
                      }

                      return callback.error(err,undefined,json);
                  }
              });
            }
        });
    };
    fetch(url);   
}

var TLDs,TLDs_fallback = "com.org.tech.net.biz.info.code.ac.ad.ae.af.ag.ai.al.am.ao.aq.ar.as.at.au.aw.ax.az.ba.bb.bd.be.bf.bg.bh.bi.bj.bm.bn.bo.br.bs.bt.bv.bw.by.bz.ca.cc.cd.cf.cg.ch.ci.ck.cl.cm.cn.co.cr.cu.cv.cw.cx.cy.cz.de.dj.dk.dm.do.dz.ec.ee.eg.er.es.et.eu.fi.fj.fk.fm.fo.fr.ga.gb.gd.ge.gf.gg.gh.gi.gl.gm.gn.gp.gq.gr.gs.gt.gu.gw.gy.hk.hm.hn.hr.ht.hu.id.ie.il.im.in.io.iq.ir.is.it.je.jm.jo.jp.ke.kg.kh.ki.km.kn.kp.kr.kw.ky.kz.la.lb.lc.li.lk.lr.ls.lt.lu.lv.ly.ma.mc.md.me.mg.mh.mk.ml.mm.mn.mo.mp.mq.mr.ms.mt.mu.mv.mw.mx.my.mz.na.nc.ne.nf.ng.ni.nl.no.np.nr.nu.nz.om.pa.pe.pf.pg.ph.pk.pl.pm.pn.pr.ps.pt.pw.py.qa.re.ro.rs.ru.rw.sa.sb.sc.sd.se.sg.sh.si.sj.sk.sl.sm.sn.so.sr.st.su.sv.sx.sy.sz.tc.td.tf.tg.th.tj.tk.tl.tm.tn.to.tr.tt.tv.tw.tz.ua.ug.uk.us.uy.uz.va.vc.ve.vg.vi.vn.vu.wf.ws.ye.yt.za.zm.zw".split(".");
var TLD_url = "https://gitcdn.xyz/repo/umpirsky/tld-list/master/data/en/tld.json";
var TLD_cache = "./tld.json";
var TLD_refresh_msec = 15 * 24 * 60 * 60 * 1000;
var TLD_last_msec;
var TLD_default_filter=function(dom){return dom.substr(0,3)!="xn-"};


function getTLDs(cb,filter_func){

    if (typeof cb!=='function') return TLDs;

    var 
    read,fetch,
    CB_WRAP=function(tlds){
        return cb(
            filter_func===false
            ? cb(tlds)
            : tlds.filter(
                typeof filter_func==='function'
                 ? filter_func
                 : TLD_default_filter)
            );
    },
    check_mtime = function(mtime) {
       if (Date.now()-mtime > TLD_refresh_msec) {
           return fetch();
       } 
       if (TLDs) return CB_WRAP (TLDs);
       return read();
    };

    fetch = function(){

        getJSON(TLD_url,function(err,data){
            if (err) {
                console.log("exception in getTLDs.fetch:",err.message||err);
                return read(true);      
            } else {
                TLDs=Object.keys(data);

                fs.writeFile(TLD_cache,JSON.stringify(TLDs),function(err){
                    if (err) {
                        // ignore save error, we have the data
                        CB_WRAP(TLDs);
                    } else {
                        // get mmtime for the file we just made
                        fs.stat(TLD_cache,function(err,stats){
                            if (!err && stats) {
                               TLD_last_msec = stats.mtimeMs; 
                            }
                            CB_WRAP(TLDs);    
                        });
                    }
                });
            }
        });
    };

    read=function(bypassFetch) {
        fs.readFile(TLD_cache,'utf-8',function(err,json){

            try {
                if (err) {

                    if (bypassFetch) {
                        // after a http errror, we fallback to hardcoded basic list of tlds
                        // if the disk file is not readable
                        console.log("exception in getTLDs.read.bypassFetch:",err.messsage||err);    

                        throw err;
                    }
                    // if the disk read failed, get the data from the CDN server instead
                    return fetch();
                }

                TLDs=JSON.parse(json);
                if (bypassFetch) {
                    // we need to update stats here as fetch called us directly
                    // instead of being called by check_mtime
                    return fs.stat(TLD_cache,function(err,stats){
                        if (err) return fetch();
                        TLD_last_msec =stats.mtimeMs;
                        return CB_WRAP(TLDs);
                    });
                }

            } catch (e){
                // after JSON error, if we aren't in an http fail situation, refetch from cdn server
                if (!bypassFetch) {
                    return fetch();
                }

                // after a http,disk,or json parse error, we fallback to hardcoded basic list of tlds

                console.log("exception in getTLDs.read:",err.messsage||err);    
                TLDs=TLDs_fallback;
            }

            return CB_WRAP(TLDs);
        });
    };

    if (TLD_last_msec) {
        return check_mtime(TLD_last_msec);
    } else {
        fs.stat(TLD_cache,function(err,stats){
            if (err) return fetch();
            TLD_last_msec =stats.mtimeMs;
            return check_mtime(TLD_last_msec);
        });
    }
}

getTLDs(console.log.bind(console));
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜