SharepointScrape/Program.cs
2024-08-11 21:55:17 -03:00

61 lines
2.2 KiB
C#

using Microsoft.Extensions.Primitives;
using PuppeteerSharp;
using System.IO;
using System.Text;
var browserFetcher = new BrowserFetcher();
await browserFetcher.DownloadAsync();
var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
Headless = false
});
var page = await browser.NewPageAsync();
await page.GoToAsync("https://domvsitcom.sharepoint.com/sites/Comunica/SitePages/Portfólio-de-Serviços.aspx");
await page.WaitForSelectorAsync("input[name='loginfmt']");
page.TypeAsync("input[name='loginfmt']", "ricardo.carneiro@domvsit.com.br").Wait();
page.ClickAsync("input[type='submit']").Wait();
page.TypeAsync("input[name='passwd']", "C4rn31r0#13").Wait();
await page.WaitForSelectorAsync("div[title='Serviços']", new WaitForSelectorOptions { Timeout=60000 });
var list = await page.QuerySelectorAllAsync("div[class='ms-List-page'] > div[role='presentation']");
var gotoList = new List<string>();
var content = new Dictionary<string, string>();
var builder = new StringBuilder();
foreach (var item in list)
{
var pageHeaderHandle = await item.QuerySelectorAsync("div[role='listitem'] > a");
var link = await pageHeaderHandle.GetPropertyAsync("href");
var linkGoTo = await link.JsonValueAsync();
gotoList.Add(linkGoTo.ToString());
}
var first = true;
foreach (var url in gotoList)
{
await page.GoToAsync(url);
var pageHeaderHandle = await page.QuerySelectorAsync("div[role='main']");
var link = await pageHeaderHandle.GetPropertyAsync("innerText");
var htmlInside = await link.JsonValueAsync();
var htmlContent = htmlInside.ToString();
var title = htmlContent.Substring(0, htmlContent.IndexOf('\n'));
var textContent = htmlContent.Substring(htmlContent.IndexOf('\n') + 2);
textContent = textContent.Replace("\n", "\n\t ");
var separe = "";
if (!first) separe = "\n\r\n\r\n\r";
var textToSave = $"{separe}**{title}** \n\n\t {textContent} ";
content.Add(url, textToSave);
builder.Append(textToSave);
first = false;
}
var local = System.Reflection.Assembly.GetExecutingAssembly().Location;
File.WriteAllText(Path.Combine(Path.GetDirectoryName(local), "Servicos.txt"), builder.ToString());
await browser.CloseAsync();