SharepointScrape/Program.cs
2024-08-10 23:40:02 -03:00

46 lines
1.6 KiB
C#

using PuppeteerSharp;
var browserFetcher = new BrowserFetcher();
await browserFetcher.DownloadAsync();
var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
Headless = false
});
var page = await browser.NewPageAsync();
await page.GoToAsync("https://domvsitcom.sharepoint.com/sites/Comunica/SitePages/Portfólio-de-Serviços.aspx");
await page.WaitForSelectorAsync("input[name='loginfmt']");
page.TypeAsync("input[name='loginfmt']", "ricardo.carneiro@domvsit.com.br").Wait();
page.ClickAsync("input[type='submit']").Wait();
page.TypeAsync("input[name='passwd']", "C4rn31r0#13").Wait();
page.ClickAsync("input[type='submit']").Wait();
await page.WaitForSelectorAsync("div[title='Serviços']", new WaitForSelectorOptions { Timeout=60000 });
var list = await page.QuerySelectorAllAsync("div[class='ms-List-page'] > div[role='presentation']");
var gotoList = new List<string>();
var content = new Dictionary<string, string>();
foreach (var item in list)
{
var pageHeaderHandle = await item.QuerySelectorAsync("div[role='listitem'] > a");
var link = await pageHeaderHandle.GetPropertyAsync("href");
var linkGoTo = await link.JsonValueAsync();
gotoList.Add(linkGoTo.ToString());
}
foreach (var url in gotoList)
{
await page.GoToAsync(url);
var pageHeaderHandle = await page.QuerySelectorAsync("div[role='main']");
var link = await pageHeaderHandle.GetPropertyAsync("innerText");
var htmlInside = await link.JsonValueAsync();
var title = htmlInside.ToString().Substring(0, htmlInside.ToString().IndexOf('\n'));
content.Add(url, $"**{title}** \n\r - {htmlInside.ToString()} ");
}