21
InfiniteScroll = {
\itemSelector scrollDelay removeOldElements.{
(Browser.ScrollLoop {
itemSelector
} {
(Sequence.First {
itemSelector
})
Browser.ScrollToBottom
(Browser.Wait {
scrollDelay
})
} {
removeOldElements
})
}
}
Main = {
Setup -> { startingUrl maxItems }
(Browser.Load {
startingUrl
})
(Sequence.Take {
maxItems
} {
(InfiniteScroll {
Select.RowContainer
} {
2000
} {
True
})
})
VideoDetails
}
Setup = {
(@Core.Unit #{
startingUrl: {
"https://www.youtube.com/results?search_query=web+scraping"
}
maxItems: {
500
}
})
}
VideoDetails = {
(@Core.Unit #{
title: {
Select.Title
}
videoUrl: {
Select.Title
Gather.Link
}
user: {
Select.User
}
userUrl: {
Select.User
Gather.Link
}
duration: {
Select.Duration
}
description: {
Select.Description
}
views: {
Select.Views
}
date: {
Select.Time
}
})
}
Main
title
String
videoUrl
String
user
String
userUrl
String
duration
String
description
String
views
String
date
String
Description
{
"classes": {
"val": "style-scope ytd-video-renderer"
},
"id1": {
"val": ""
},
"class2": {
"style-scope": true,
"ytd-video-renderer": true
},
"tag": {
"val": "YT-FORMATTED-STRING"
},
"class3": {
"style-scope": true,
"ytd-item-section-renderer": true
},
"font": {
"val": "Roboto, Arial, sans-serif"
},
"id2": {
"val": "dismissable"
},
"classes2": {
"val": "style-scope ytd-video-renderer"
},
"visible": {
"val": true
},
"classes1": {
"val": "text-wrapper style-scope ytd-video-renderer"
},
"classes3": {
"val": "style-scope ytd-item-section-renderer"
},
"id3": {
"val": ""
},
"tag2": {
"val": "DIV"
},
"id": {
"val": "description-text"
},
"color": {
"val": "rgb(96, 96, 96)"
},
"class": {
"style-scope": true,
"ytd-video-renderer": true
},
"class1": {
"text-wrapper": true,
"style-scope": true,
"ytd-video-renderer": true
},
"tag1": {
"val": "DIV"
},
"tag3": {
"val": "YTD-VIDEO-RENDERER"
},
"_tolerance": {
"val": 0.044
}
}
Duration
{
"classes": {
"val": "style-scope ytd-thumbnail-overlay-time-status-renderer"
},
"id1": {
"val": ""
},
"class2": {
"style-scope": true,
"ytd-thumbnail": true
},
"tag": {
"val": "SPAN"
},
"class3": {
"yt-simple-endpoint": true,
"inline-block": true,
"style-scope": true,
"ytd-thumbnail": true
},
"font": {
"val": "Roboto, Arial, sans-serif"
},
"id2": {
"val": "overlays"
},
"classes2": {
"val": "style-scope ytd-thumbnail"
},
"visible": {
"val": true
},
"classes1": {
"val": "style-scope ytd-thumbnail"
},
"classes3": {
"val": "yt-simple-endpoint inline-block style-scope ytd-thumbnail"
},
"id3": {
"val": "thumbnail"
},
"tag2": {
"val": "DIV"
},
"id": {
"val": ""
},
"color": {
"val": "rgb(255, 255, 255)"
},
"class": {
"style-scope": true,
"ytd-thumbnail-overlay-time-status-renderer": true
},
"class1": {
"style-scope": true,
"ytd-thumbnail": true
},
"tag1": {
"val": "YTD-THUMBNAIL-OVERLAY-TIME-STATUS-RENDERER"
},
"tag3": {
"val": "A"
},
"childrenLen": {
"val": 0
},
"_tolerance": {
"val": 0.044
}
}
RowContainer
{
"classes": {
"val": "style-scope ytd-item-section-renderer"
},
"id1": {
"val": "contents"
},
"class2": {
"style-scope": true,
"ytd-section-list-renderer": true
},
"tag": {
"val": "YTD-VIDEO-RENDERER"
},
"class3": {
"style-scope": true,
"ytd-section-list-renderer": true
},
"font": {
"val": "Roboto, Arial, sans-serif"
},
"id2": {
"val": ""
},
"classes2": {
"val": "style-scope ytd-section-list-renderer"
},
"visible": {
"val": true
},
"classes1": {
"val": "style-scope ytd-item-section-renderer"
},
"classes3": {
"val": "style-scope ytd-section-list-renderer"
},
"id3": {
"val": "contents"
},
"tag2": {
"val": "YTD-ITEM-SECTION-RENDERER"
},
"id": {
"val": ""
},
"color": {
"val": "rgb(0, 0, 0)"
},
"class": {
"style-scope": true,
"ytd-item-section-renderer": true
},
"class1": {
"style-scope": true,
"ytd-item-section-renderer": true
},
"tag1": {
"val": "DIV"
},
"tag3": {
"val": "DIV"
},
"childrenLen": {
"val": 2
},
"_tolerance": {
"val": 0.044
}
}
Time
{
"classes": {
"val": "style-scope ytd-video-meta-block"
},
"id1": {
"val": "metadata-line"
},
"class2": {
"style-scope": true,
"ytd-video-meta-block": true
},
"tag": {
"val": "SPAN"
},
"class3": {
"style-scope": true,
"ytd-video-renderer": true
},
"font": {
"val": "Roboto, Arial, sans-serif"
},
"id2": {
"val": "metadata"
},
"classes2": {
"val": "style-scope ytd-video-meta-block"
},
"visible": {
"val": true
},
"classes1": {
"val": "style-scope ytd-video-meta-block"
},
"id3": {
"val": ""
},
"tag2": {
"val": "DIV"
},
"id": {
"val": ""
},
"color": {
"val": "rgb(96, 96, 96)"
},
"indexes": {
"0": 5
},
"class": {
"style-scope": true,
"ytd-video-meta-block": true
},
"class1": {
"style-scope": true,
"ytd-video-meta-block": true
},
"tag1": {
"val": "DIV"
},
"tag3": {
"val": "YTD-VIDEO-META-BLOCK"
},
"childrenLen": {
"val": 0
},
"_tolerance": {
"val": 0.022
}
}
Title
{"classes":{"val":"yt-simple-endpoint style-scope ytd-video-renderer"},"id1":{"val":""},"class2":{"style-scope":true,"ytd-video-renderer":true},"tag":{"val":"A"},"class3":{"style-scope":true,"ytd-video-renderer":true},"font":{"val":"Roboto, Arial, sans-serif"},"id2":{"val":"title-wrapper"},"classes2":{"val":"style-scope ytd-video-renderer"},"visible":{"val":true},"classes1":{"val":"title-and-badge style-scope ytd-video-renderer"},"classes3":{"val":"style-scope ytd-video-renderer"},"id3":{"val":"meta"},"tag2":{"val":"DIV"},"id":{"val":"video-title"},"class":{"yt-simple-endpoint":true,"style-scope":true,"ytd-video-renderer":true},"class1":{"title-and-badge":true,"style-scope":true,"ytd-video-renderer":true},"tag1":{"val":"H3"},"tag3":{"val":"DIV"},"childrenLen":{"val":0}}
User
{
"classes": {
"val": "yt-simple-endpoint style-scope yt-formatted-string"
},
"id1": {
"val": "byline"
},
"class2": {
"style-scope": true,
"ytd-video-meta-block": true
},
"tag": {
"val": "A"
},
"class3": {
"style-scope": true,
"ytd-video-meta-block": true
},
"font": {
"val": "Roboto, Arial, sans-serif"
},
"id2": {
"val": "byline-inner-container"
},
"classes2": {
"val": "style-scope ytd-video-meta-block"
},
"visible": {
"val": true
},
"classes1": {
"val": "style-scope ytd-video-meta-block complex-string"
},
"classes3": {
"val": "style-scope ytd-video-meta-block"
},
"id3": {
"val": "byline-container"
},
"tag2": {
"val": "DIV"
},
"id": {
"val": ""
},
"color": {
"val": "rgb(96, 96, 96)"
},
"class": {
"yt-simple-endpoint": true,
"style-scope": true,
"yt-formatted-string": true
},
"class1": {
"style-scope": true,
"ytd-video-meta-block": true,
"complex-string": true
},
"tag1": {
"val": "YT-FORMATTED-STRING"
},
"tag3": {
"val": "DIV"
},
"childrenLen": {
"val": 0
},
"_tolerance": {
"val": 0.044
}
}
Views
{
"classes": {
"val": "style-scope ytd-video-meta-block"
},
"id1": {
"val": "metadata-line"
},
"class2": {
"style-scope": true,
"ytd-video-meta-block": true
},
"tag": {
"val": "SPAN"
},
"class3": {
"style-scope": true,
"ytd-video-renderer": true
},
"font": {
"val": "Roboto, Arial, sans-serif"
},
"id2": {
"val": "metadata"
},
"classes2": {
"val": "style-scope ytd-video-meta-block"
},
"visible": {
"val": true
},
"classes1": {
"val": "style-scope ytd-video-meta-block"
},
"id3": {
"val": ""
},
"tag2": {
"val": "DIV"
},
"id": {
"val": ""
},
"color": {
"val": "rgb(96, 96, 96)"
},
"indexes": {
"0": 2
},
"class": {
"style-scope": true,
"ytd-video-meta-block": true
},
"class1": {
"style-scope": true,
"ytd-video-meta-block": true
},
"tag1": {
"val": "DIV"
},
"tag3": {
"val": "YTD-VIDEO-META-BLOCK"
},
"childrenLen": {
"val": 0
},
"_tolerance": {
"val": 0.022
}
}
HTML
var result = function (element) {
return element.nodeType === 1 ? element.outerHTML : element.nodeValue;
};
result.noCache = false;
return result;
true
true
Link
var result = function (element) {
var upperTag = function (element) {
return element.tagName ? element.tagName.toUpperCase() : element.tagName;
};
var linkOf = function (element) {
return element && element.href && (element.protocol === "http:" || element.protocol === "https:" || element.protocol == "file:" || element.protocol == "ftp:") ? element.href : null;
};
var original = element;
while (element && (!element.tagName || upperTag(element) !== 'A'))
element = element.parentNode;
var result = linkOf(element);
if (!result) {
if (original.children.length == 1) {
var child = original.children[0];
if (upperTag(child) === 'A') {
result = linkOf(child);
if (!result) {
if (child.children.length == 1) {
child = child.children[0];
if (upperTag(child) === 'A')
result = linkOf(child);
}
}
}
}
}
return result;
};
result.noCache = false;
return result;
true
true
Number
var result = function (element) {
var getText = function () {
return element.nodeType === 1 ? element.innerText : element.nodeValue;
};
var regex = /-?(?=[1-9]|0(?!\d))[\d\,]+(\.\d+)?/;
var first = regex.exec(getText(element));
return first && first.length ? first[0] : null;
};
result.noCache = false;
return result;
false
true
OwnText
var result = function (element) {
var text = [].reduce.call(element.childNodes || [], (a, b) => a + (b.nodeType === 3 ? b.textContent : ''), '');
return text.trim();
};
result.noCache = false;
return result;
false
true
Path
var result = function (element) {
return window.___getPath(element);
};
result.noCache = false;
return result;
false
true
Src
var result = function (element) {
return element.src || "";
};
result.noCache = false;
return result;
false
true
Tag
var result = function (element) {
var upperTag = function (element) {
return element.nodeType === 1 ? element.tagName.toUpperCase() : "HS-NODE";
};
return upperTag(element);
};
result.noCache = false;
return result;
false
true
Text
var result = function (element) {
var text = element.nodeType === 1 ? element.innerText : element.nodeValue;
return text ? text.trim() : "";
};
result.noCache = false;
return result;
true
true
URL
var result = function (element) {
return element.ownerDocument.defaultView.location.href;
};
result.noCache = false;
return result;
false
true
Value
var result = function (element) {
return element.value || element.getAttribute('value');
};
result.noCache = false;
return result;
false
true
childrenLen
var result = function (element) {
return { val: element.nodeType === 1 ? element.children.length : 0 };
};
result.score = 0.7; // Default
return result;
false
true
class
var result = function (element) {
if (element.nodeType !== 1)
element = element.parentElement;
var result = {};
var list = element.classList;
var len = list.length;
for (var i = 0; i < len; i++)
result[list[i]] = true;
return result;
};
result.score = 0.73;
return result;
false
true
class1
var result = window.___makeRelatedGatherer('class', function (element) {
return element.parentNode;
});
result.score = 0.87;
return result;
false
true
class2
var result = window.___makeRelatedGatherer('class', function (element) {
return element.parentNode.parentNode;
});
result.score = 0.87;
return result;
false
true
class3
var result = window.___makeRelatedGatherer('class', function (element) {
return element.parentNode.parentNode.parentNode;
});
result.score = 0.9;
return result;
false
true
classes
var result = function (element) {
if (element.nodeType !== 1)
element = element.parentElement;
return { val: typeof element.className === 'string' ? element.className : null };
};
result.score = 0.91;
return result;
false
true
classes1
var result = window.___makeRelatedGatherer('classes', function (element) {
return element.parentNode;
});
result.score = 0.95;
return result;
false
true
classes2
var result = window.___makeRelatedGatherer('classes', function (element) {
return element.parentNode.parentNode;
});
result.score = 0.95;
return result;
false
true
classes3
var result = window.___makeRelatedGatherer('classes', function (element) {
return element.parentNode.parentNode.parentNode;
});
result.score = 0.81;
return result;
false
true
colIndex
var result = function (element) {
var maxUp = 5;
function closestTd(element) {
for (var up = 0; up < maxUp; up++) {
if (!element) return null;
if (/^t[dh]$/i.test(element.nodeName))
return element;
element = element.parentNode;
}
return null;
}
function colSpan(col) {
return parseInt((col && col.nodeType == 1 && /^t[dh]$/i.test(element.nodeName)) ? (col.getAttribute('colSpan') || 1) : 0);
}
function endIndex(element) {
var colPos = colSpan(element);
while (element) {
element = element.previousSibling;
colPos += colSpan(element);
}
return colPos;
}
element = closestTd(element);
if (element)
return { val: endIndex(element.previousSibling) };
else
return {};
};
result.score = 0.7; // Default
return result;
false
true
color
var result = function (element) {
if (element.nodeType !== 1)
element = element.parentElement;
return { val: window.getComputedStyle(element).color };
};
result.score = 1;
return result;
false
true
font
var result = function (element) {
if (element.nodeType !== 1)
element = element.parentElement;
return { val: window.getComputedStyle(element)['font-family'] };
};
result.score = 1;
return result;
false
true
header
var result = function header(element) {
var upperTag = function (element) {
return element.tagName ? element.tagName.toUpperCase() : element.tagName;
};
var getColumnIndex = function (e) {
var maxLevels = 3;
while (e && upperTag(e) !== 'TD' && maxLevels > 0) {
e = e.parentElement;
maxLevels--;
}
if (e && upperTag(e) === 'TD') {
var index = 0;
while ((e = e.previousElementSibling)) {
index += e.colSpan || 1;
}
return index;
}
else
return -1;
};
var colIndex = getColumnIndex(element);
if (colIndex !== -1) {
while (element && upperTag(element) !== 'TABLE')
element = element.parentElement;
if (element && upperTag(element) === 'TABLE') {
var tds = element.querySelectorAll('th,td');
var index = 0;
var latest = null;
for (var td of tds) {
if (index > colIndex)
return { val: (latest.innerText || "").trim() };
latest = td;
index += td.colSpan || 1;
}
}
}
return {};
};
result.score = 0.9;
return result;
false
true
id
var result = function (element) {
if (element.nodeType !== 1)
element = element.parentElement;
return { val: element.id };
};
result.score = 0.98;
return result;
false
true
id1
var result = window.___makeRelatedGatherer('id', function (element) {
return element.parentNode;
});
result.score = 0.91;
return result;
false
true
id2
var result = window.___makeRelatedGatherer('id', function (element) {
return element.parentNode.parentNode;
});
result.score = 0.91;
return result;
false
true
id3
var result = window.___makeRelatedGatherer('id', function (element) {
return element.parentNode.parentNode.parentNode;
});
result.score = 0.88;
return result;
false
true
indexes
var result = function (element) {
function getElementIndex(node) {
var index = 0;
while ((node = node.previousSibling)) {
index++;
}
return index;
}
var result = {};
var level = 0;
var levels = 3;
while (element && level < levels) {
result[level] = getElementIndex(element);
level++;
element = element.parentElement
}
return result;
};
result.score = 0.86;
return result;
false
true
names
var result = function (element) {
function getElementName(node) {
return node.nodeType === 1 ? (node.name || node.getAttribute('name')) : null;
}
var result = {};
var level = 0;
var levels = 3;
while (element && level < levels) {
var name = getElementName(element);
if (name) result[level] = name;
level++;
element = element.parentElement
}
return result;
};
result.score = 0.75;
return result;
false
true
prevText
var result = function (element) {
var maxDepth = 0;
var maxLength = 128;
while (element && !element.previousElementSibling && maxDepth > 0) {
element = element.parentElement;
maxDepth--;
}
if (element && element.previousElementSibling) {
var fullText = element.previousElementSibling.innerText;
if (fullText.length < maxLength)
return { val: fullText };
}
return null;
};
result.score = 0.7; // Default
return result;
false
true
tag
var result = function (element) {
var upperTag = function (element) {
return element.nodeType === 1 ? element.tagName.toUpperCase() : "HS-NODE";
};
return { val: upperTag(element) };
};
result.score = 1;
return result;
false
true
tag1
var result = window.___makeRelatedGatherer('tag', function (element) {
return element.parentNode;
});
result.score = 1;
return result;
false
true
tag2
var result = window.___makeRelatedGatherer('tag', function (element) {
return element.parentNode.parentNode;
});
result.score = 1;
return result;
false
true
tag3
var result = window.___makeRelatedGatherer('tag', function (element) {
return element.parentNode.parentNode.parentNode;
});
result.score = 1;
return result;
false
true
text
var result = function (element) {
if (element.nodeType !== 1)
return null;
if (window.___textTags.has(element.tagName.toUpperCase()))
return null;
var text = element.innerText;
if (text.length > 64)
return null;
return { val: text };
};
result.score = 0.1;
return result;
false
true
visible
var result = function (element) {
if (element.nodeType !== 1)
element = element.parentElement;
return { val: element.offsetParent !== null };
};
result.score = 1;
return result;
false
true
*://*.doubleclick.net/*
*://*.amazon-adsystem.com/*
*://*.google-analytics.com/*
*://*.googlesyndication.com/*
*://*.advertising.com/*
*://*.imrworldwide.com/*
*://*.quantserve.com/*
*://*.googleadservices.com/*
*://*.scorecardresearch.com/*
91b878acfc9885ee72e165c87b979412
CwPP8P7nJA2Wllegt+Ycpek4C2zKYU1EW/LbR3uVnhcT2kGm2waRKTCP+2rO4x1h2xkY3i9bwtP92Rux3Ww/9i9SkPCD+AsFmeEuqWvI3c8GrtYHBpVI5ol2L/OkKvYElXkq26QA8KO81UWiMzfEvOyHj1rSUWCfjmtmiP06eU9KyMcheIUZqj6hdPmHZkznCWu63TgkNfSh3ZVKfhRFqD6sUrun+etSo9K5QweaWU2SVLDWFzY4AjFoPZac9g1LVkIFOCagxX2zAp6Xbu/qEB/ceqzSsMTReztSkyXN5phTcnct8I3cBCf1DKXn8NJSYKU0EP4ggQ1SYVCBnvNU3g==