javascript 点击所有链接下载PDF(发票)
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了javascript 点击所有链接下载PDF(发票)相关的知识,希望对你有一定的参考价值。
/**
* NOTE: You must turn on downloading (instead of opening) of PDFs here:
* chrome://settings/content/pdfDocuments
*
* Clicks all links on a page to trigger them to download.
* Only works in Chrome (Firefox and Safari don't download the files).
* To use: Paste into the Console tab (right-click -> Inspect Element).
*
* Chrome stops adding a unique number to file names after 100 files.
* So this stops every 100 links to all downloaded files to be moved elsewhere.
* Pauses with a semi-random delay between clicks to help avoid tripping bot detection.
*
* dave@drw.me.uk
*/
/* Download function V1
Using this way of downloading results in two things:
1) Many skipped downloads if the pause is less than 1 second. (iframe method works fine for this).
2) Even at 1 second pauses, occasional skipped downloads.
Using the iFrame way appears to avoid the skips but since the problem is sporadic, it's difficult to prove that it's a fix.
However I think that it's the uniqueness of the iFrames (each has a unique ID) which stops one slow call being over-written by another.
So it's possible that by simply giving a tag a unique ID, this would still work fine, but I've not had time to test it.
function downloadLink(uri, name)
{
var link = document.createElement("a");
link.setAttribute("id", name);
link.download = name; // Doesn't rename in Chrome currently
link.href = uri;
document.body.appendChild(link);
if (!testMode) {
link.click();
}
}
*/
/* Download function V2 Oct 2018 this used to work, and work really fast... but now fails silently :(
function downloadLink(url, id) {
var ifrm = document.createElement('iframe');
ifrm.setAttribute("id", id);
ifrm.setAttribute("src", url);
if (!testMode) {
document.body.appendChild(ifrm); // Triggers the actual download
}
}
*/
/* Download function V3
Another potential way (which doesn't work):
From here: https://bugs.chromium.org/p/chromium/issues/detail?id=822542#c12
But without the sandbox call (since that stops it cold).
function downloadLink(url, id) {
const iframe = document.createElement('iframe');
iframe.style.setProperty('display', 'none', 'important');
// iframe.sandbox = 'allow-modals allow-scripts';
iframe.onload = () => {
setTimeout(() => iframe.remove());
};
iframe.srcdoc = `<body><script>{
const link = document.body.appendChild(document.createElement('a'));
link.download = '';
link.href = JSON.parse('${JSON.stringify(url)}');
link.dispatchEvent(new MouseEvent('click'));
}<${'/'}script></body>`;
document.body.appendChild(iframe);
}
*/
/* Download function V4
Another potential way (which doesn't work!):
MDN docs on the download API: https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/downloads
However the examples use the "browser" object and that's a Firefox thing.
The examples then clarify that you can polyfill Chrome: https://github.com/mdn/webextensions-examples/blob/master/README.md
But the polyfill seems to be an NPM module?? https://github.com/mozilla/webextension-polyfill
*/
// Currently using this way of downloading:
function downloadLink(uri, name)
{
var link = document.createElement("a"); // Note that doing this rather unexpectedly changes the state of the "links" variable: links.length keeps incrementing!
link.setAttribute("id", name);
link.download = "name_" + name;
link.href = uri;
var holder = document.getElementById("download-link-holder");
holder.appendChild(link);
if (!testMode) {
link.click();
}
}
function processLink(i)
{
if(i === linkCount) {
console.log("All links clicked. Done!");
beep();
return; // Not returning a value here since the "do/while" loop is not involved once there is nothing more to skip
}
var link = links[i].getAttribute('href');
var linkNumber = i + 1;
if(linkNumber > maxLinksToClick) {
console.log("Max number of links (" + maxLinksToClick + ") was reached. Exiting...");
return;
}
if (linkNumber <= startAfterLink) {
console.log("Skipping link " + linkNumber + ": " + link);
return linkNumber;
}
console.log("Link " + linkNumber + ": " + link);
downloadLink(link, "link-number-" + linkNumber); // TODO if Chrome ever uses the name; we could pull the order and date from the spreadsheet and use it here
if(i > 0 && (linkNumber + beepXLinksBeforeEnd) % batchSize === 0) {
beep();beep();beep();beep(); // Long beep!
}
if(i > 0 && linkNumber % batchSize === 0) {
console.log("Pausing after link " + linkNumber);
// Prompt will not appear if the window doesn't have focus! https://www.chromestatus.com/feature/5637107137642496
var promptResult = prompt("Pausing after link " + linkNumber + ". To repeat the last batch of " + batchSize + ", type \"R\".", link);
if (promptResult === null) {
console.log("Cancel button clicked - exiting");
return; // Abort, they hit Cancel
} else if (promptResult === "r" || promptResult === "R") {
linkNumber = linkNumber - batchSize;
console.log("Request to repeat the last batch. Reset link count to " + linkNumber);
}
}
setTimeout(function() {
processLink(linkNumber); // linkNumber is also the next offset in the array
}, Math.floor(Math.random() * 100) + pauseBetweenClicks); // Random click intervals to attempt to avoid bot detection
}
// https://stackoverflow.com/questions/879152/how-do-i-make-javascript-beep#23395136
function beep() {
var snd = new Audio("data:audio/wav;base64,//uQRAAAAWMSLwUIYAAsYkXgoQwAEaYLWfkWgAI0wWs/ItAAAGDgYtAgAyN+QWaAAihwMWm4G8QQRDiMcCBcH3Cc+CDv/7xA4Tvh9Rz/y8QADBwMWgQAZG/ILNAARQ4GLTcDeIIIhxGOBAuD7hOfBB3/94gcJ3w+o5/5eIAIAAAVwWgQAVQ2ORaIQwEMAJiDg95G4nQL7mQVWI6GwRcfsZAcsKkJvxgxEjzFUgfHoSQ9Qq7KNwqHwuB13MA4a1q/DmBrHgPcmjiGoh//EwC5nGPEmS4RcfkVKOhJf+WOgoxJclFz3kgn//dBA+ya1GhurNn8zb//9NNutNuhz31f////9vt///z+IdAEAAAK4LQIAKobHItEIYCGAExBwe8jcToF9zIKrEdDYIuP2MgOWFSE34wYiR5iqQPj0JIeoVdlG4VD4XA67mAcNa1fhzA1jwHuTRxDUQ//iYBczjHiTJcIuPyKlHQkv/LHQUYkuSi57yQT//uggfZNajQ3Vmz+Zt//+mm3Wm3Q576v////+32///5/EOgAAADVghQAAAAA//uQZAUAB1WI0PZugAAAAAoQwAAAEk3nRd2qAAAAACiDgAAAAAAABCqEEQRLCgwpBGMlJkIz8jKhGvj4k6jzRnqasNKIeoh5gI7BJaC1A1AoNBjJgbyApVS4IDlZgDU5WUAxEKDNmmALHzZp0Fkz1FMTmGFl1FMEyodIavcCAUHDWrKAIA4aa2oCgILEBupZgHvAhEBcZ6joQBxS76AgccrFlczBvKLC0QI2cBoCFvfTDAo7eoOQInqDPBtvrDEZBNYN5xwNwxQRfw8ZQ5wQVLvO8OYU+mHvFLlDh05Mdg7BT6YrRPpCBznMB2r//xKJjyyOh+cImr2/4doscwD6neZjuZR4AgAABYAAAABy1xcdQtxYBYYZdifkUDgzzXaXn98Z0oi9ILU5mBjFANmRwlVJ3/6jYDAmxaiDG3/6xjQQCCKkRb/6kg/wW+kSJ5//rLobkLSiKmqP/0ikJuDaSaSf/6JiLYLEYnW/+kXg1WRVJL/9EmQ1YZIsv/6Qzwy5qk7/+tEU0nkls3/zIUMPKNX/6yZLf+kFgAfgGyLFAUwY//uQZAUABcd5UiNPVXAAAApAAAAAE0VZQKw9ISAAACgAAAAAVQIygIElVrFkBS+Jhi+EAuu+lKAkYUEIsmEAEoMeDmCETMvfSHTGkF5RWH7kz/ESHWPAq/kcCRhqBtMdokPdM7vil7RG98A2sc7zO6ZvTdM7pmOUAZTnJW+NXxqmd41dqJ6mLTXxrPpnV8avaIf5SvL7pndPvPpndJR9Kuu8fePvuiuhorgWjp7Mf/PRjxcFCPDkW31srioCExivv9lcwKEaHsf/7ow2Fl1T/9RkXgEhYElAoCLFtMArxwivDJJ+bR1HTKJdlEoTELCIqgEwVGSQ+hIm0NbK8WXcTEI0UPoa2NbG4y2K00JEWbZavJXkYaqo9CRHS55FcZTjKEk3NKoCYUnSQ0rWxrZbFKbKIhOKPZe1cJKzZSaQrIyULHDZmV5K4xySsDRKWOruanGtjLJXFEmwaIbDLX0hIPBUQPVFVkQkDoUNfSoDgQGKPekoxeGzA4DUvnn4bxzcZrtJyipKfPNy5w+9lnXwgqsiyHNeSVpemw4bWb9psYeq//uQZBoABQt4yMVxYAIAAAkQoAAAHvYpL5m6AAgAACXDAAAAD59jblTirQe9upFsmZbpMudy7Lz1X1DYsxOOSWpfPqNX2WqktK0DMvuGwlbNj44TleLPQ+Gsfb+GOWOKJoIrWb3cIMeeON6lz2umTqMXV8Mj30yWPpjoSa9ujK8SyeJP5y5mOW1D6hvLepeveEAEDo0mgCRClOEgANv3B9a6fikgUSu/DmAMATrGx7nng5p5iimPNZsfQLYB2sDLIkzRKZOHGAaUyDcpFBSLG9MCQALgAIgQs2YunOszLSAyQYPVC2YdGGeHD2dTdJk1pAHGAWDjnkcLKFymS3RQZTInzySoBwMG0QueC3gMsCEYxUqlrcxK6k1LQQcsmyYeQPdC2YfuGPASCBkcVMQQqpVJshui1tkXQJQV0OXGAZMXSOEEBRirXbVRQW7ugq7IM7rPWSZyDlM3IuNEkxzCOJ0ny2ThNkyRai1b6ev//3dzNGzNb//4uAvHT5sURcZCFcuKLhOFs8mLAAEAt4UWAAIABAAAAAB4qbHo0tIjVkUU//uQZAwABfSFz3ZqQAAAAAngwAAAE1HjMp2qAAAAACZDgAAAD5UkTE1UgZEUExqYynN1qZvqIOREEFmBcJQkwdxiFtw0qEOkGYfRDifBui9MQg4QAHAqWtAWHoCxu1Yf4VfWLPIM2mHDFsbQEVGwyqQoQcwnfHeIkNt9YnkiaS1oizycqJrx4KOQjahZxWbcZgztj2c49nKmkId44S71j0c8eV9yDK6uPRzx5X18eDvjvQ6yKo9ZSS6l//8elePK/Lf//IInrOF/FvDoADYAGBMGb7FtErm5MXMlmPAJQVgWta7Zx2go+8xJ0UiCb8LHHdftWyLJE0QIAIsI+UbXu67dZMjmgDGCGl1H+vpF4NSDckSIkk7Vd+sxEhBQMRU8j/12UIRhzSaUdQ+rQU5kGeFxm+hb1oh6pWWmv3uvmReDl0UnvtapVaIzo1jZbf/pD6ElLqSX+rUmOQNpJFa/r+sa4e/pBlAABoAAAAA3CUgShLdGIxsY7AUABPRrgCABdDuQ5GC7DqPQCgbbJUAoRSUj+NIEig0YfyWUho1VBBBA//uQZB4ABZx5zfMakeAAAAmwAAAAF5F3P0w9GtAAACfAAAAAwLhMDmAYWMgVEG1U0FIGCBgXBXAtfMH10000EEEEEECUBYln03TTTdNBDZopopYvrTTdNa325mImNg3TTPV9q3pmY0xoO6bv3r00y+IDGid/9aaaZTGMuj9mpu9Mpio1dXrr5HERTZSmqU36A3CumzN/9Robv/Xx4v9ijkSRSNLQhAWumap82WRSBUqXStV/YcS+XVLnSS+WLDroqArFkMEsAS+eWmrUzrO0oEmE40RlMZ5+ODIkAyKAGUwZ3mVKmcamcJnMW26MRPgUw6j+LkhyHGVGYjSUUKNpuJUQoOIAyDvEyG8S5yfK6dhZc0Tx1KI/gviKL6qvvFs1+bWtaz58uUNnryq6kt5RzOCkPWlVqVX2a/EEBUdU1KrXLf40GoiiFXK///qpoiDXrOgqDR38JB0bw7SoL+ZB9o1RCkQjQ2CBYZKd/+VJxZRRZlqSkKiws0WFxUyCwsKiMy7hUVFhIaCrNQsKkTIsLivwKKigsj8XYlwt/WKi2N4d//uQRCSAAjURNIHpMZBGYiaQPSYyAAABLAAAAAAAACWAAAAApUF/Mg+0aohSIRobBAsMlO//Kk4soosy1JSFRYWaLC4qZBYWFRGZdwqKiwkNBVmoWFSJkWFxX4FFRQWR+LsS4W/rFRb/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////VEFHAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAU291bmRib3kuZGUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMjAwNGh0dHA6Ly93d3cuc291bmRib3kuZGUAAAAAAAAAACU=");
snd.play();
}
/**** Entry point to the processing ****/
var testMode = false; // Set to true for debugging (does not actually download)
var startAfterLink = 0; // 0 is the default. If you have downloaded 200 and make a mistake, set this to 200 temporarily
var batchSize = 100; // Chrome stops "uniquefying" names after 100 so we need to batch (also useful for testing!)
var maxLinksToClick = 200000; // 200000 - Simply a sanity check. Note: This count includes skipped links.
var pauseBetweenClicks = 1500; // Milliseconds. Sadly, the clicks appear to need to be at least 1.5 seconds apart or else Chrome ignores them (no pause) or skips some now and again (less than 1.5 seconds pause)
var beepXLinksBeforeEnd = 10;
// Make a holder for the appended <a> tags (assuming we are using that approach to download)
// It would be neater to remove each new <a> tag after we have downloaded but I can't find a neat way to do that.
// They do no harm during a single run but if we don't detect and remove them when running again (without a reload) we pick them up in the links variable.
var holder = document.getElementById("download-link-holder");
if (holder === null) {
holder = document.createElement("div");
holder.setAttribute("id", "download-link-holder");
document.body.appendChild(holder);
} else {
// Remove earlier data. Only actually needed when running again on the same page without a reload.
holder.innerHTML = '';
}
var links = document.getElementsByTagName('a');
var linkCount = links.length; // We must save this before we append any <a> tags (assuming we are using that approach)
var validStartPoint = true;
var promptStartPoint = prompt("Processing " + linkCount + " links. Starting at link number " + (startAfterLink + 1) + ", in batches of " + batchSize + ".\nIf you've already downloaded the first two batches and want to start at the third, enter at " + ((batchSize * 2) + 1) + ":", (startAfterLink + 1)); // "startAfterLink" not "start at link"!
if (promptStartPoint === null) {
console.log("Cancel button clicked - exiting");
validStartPoint = false; // Abort, they hit Cancel
} else if (isNaN(promptStartPoint) || !promptStartPoint || promptStartPoint === "") {
validStartPoint = false;
} else {
startAfterLink = promptStartPoint - 1;
}
if(!validStartPoint) {
console.log("Start point is not valid. Exiting.");
} else if (!linkCount) {
console.log('Error: Cannot find any links...');
} else {
var skipped = 0;
do {
// Note we only re-enter this loop until there is nothing more to skip!
skipped = processLink(skipped);
} while(skipped > 0 && skipped < maxLinksToClick);
}
以上是关于javascript 点击所有链接下载PDF(发票)的主要内容,如果未能解决你的问题,请参考以下文章