Showing posts with label link. Show all posts
Showing posts with label link. Show all posts

Thursday, December 9, 2021

PHP web scrap link simple

 https://www.endpointdev.com/blog/2016/07/scrape-web-content-with-php-no-api-no/


Windows 10 & 11 Sound Keyboard Application free download

https://www.benibela.de/sources_en.html#internettools

https://stackoverflow.com/questions/14691782/web-page-scraping-in-delphi

https://www.rosettacode.org/wiki/Web_scraping

http://videlibri.sourceforge.net/xidel.html




unit main;{$mode objfpc}{$H+}interfaceuses
Classes, SysUtils, Forms, Controls, Graphics, Dialogs, StdCtrls,
fphttpclient, regexpr;
type{ TForm1 }TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
procedure downloadBook(bookname: String);
private
publicend;var
Form1: TForm1;
implementation{$R *.lfm}{ TForm1 }const
baseUrl = 'https://beogradsko.blogspot.com/';
var
targetDirectory: AnsiString;
procedure TForm1.Button1Click(Sender: TObject);
var
page, bookname: AnsiString;
re: TRegExpr;
begin
targetDirectory := GetUserDir + 'downloads' + DirectorySeparator + 'GoalKickerBooks' + DirectorySeparator;
if Not DirectoryExists(targetDirectory) then
CreateDir(targetDirectory);
// Grab the base page
page := TFPHTTPClient.SimpleGet(baseUrl);
// Find all book urls
re := TRegExpr.Create('<a href="([\w]+)/"');
try
if re.Exec(page) then begin
bookname := re.Match[1];
downloadBook(bookname);
while re.ExecNext do begin
bookname := re.Match[1];
downloadBook(bookname);
Application.ProcessMessages;
end;
end;
Memo1.Append('');
Memo1.Append('All books downloaded');
finally
re.Free;
end;
end;
procedure TForm1.downloadBook(bookname: String);
var
page: AnsiString;
re: TRegExpr;
begin
// Get page
page := TFPHTTPClient.SimpleGet(baseUrl + bookname + '/index.html');
// Grab PDF url
re := TRegExpr.Create('location.href=''([\w]+.pdf)''');
try
if re.Exec(page) then begin
Memo1.Append('Downloading ' + baseUrl + bookname + '/' + re.Match[1]);
TFPHTTPClient.SimpleGet(baseUrl + bookname + '/' + re.Match[1], targetDirectory + re.Match[1]);
end;
finally
re.Free;
end;
end;
end.