https://www.endpointdev.com/blog/2016/07/scrape-web-content-with-php-no-api-no/
Windows 10 & 11 Sound Keyboard Application free download
https://www.benibela.de/sources_en.html#internettools
https://stackoverflow.com/questions/14691782/web-page-scraping-in-delphi
https://www.rosettacode.org/wiki/Web_scraping
http://videlibri.sourceforge.net/xidel.html
unit main;{$mode objfpc}{$H+}interfaceuses
Classes, SysUtils, Forms, Controls, Graphics, Dialogs, StdCtrls,
fphttpclient, regexpr;type{ TForm1 }TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
procedure downloadBook(bookname: String);
privatepublicend;var
Form1: TForm1;implementation{$R *.lfm}{ TForm1 }const
baseUrl = 'https://beogradsko.blogspot.com/';var
targetDirectory: AnsiString;procedure TForm1.Button1Click(Sender: TObject);
var
page, bookname: AnsiString;
re: TRegExpr;
begin
targetDirectory := GetUserDir + 'downloads' + DirectorySeparator + 'GoalKickerBooks' + DirectorySeparator;if Not DirectoryExists(targetDirectory) then
CreateDir(targetDirectory);// Grab the base page
page := TFPHTTPClient.SimpleGet(baseUrl);// Find all book urls
re := TRegExpr.Create('<a href="([\w]+)/"');
try
if re.Exec(page) then begin
bookname := re.Match[1];
downloadBook(bookname);
while re.ExecNext do begin
bookname := re.Match[1];
downloadBook(bookname);
Application.ProcessMessages;
end;
end;Memo1.Append('');
Memo1.Append('All books downloaded');
finally
re.Free;
end;
end;procedure TForm1.downloadBook(bookname: String);
var
page: AnsiString;
re: TRegExpr;
begin
// Get page
page := TFPHTTPClient.SimpleGet(baseUrl + bookname + '/index.html');// Grab PDF url
re := TRegExpr.Create('location.href=''([\w]+.pdf)''');
try
if re.Exec(page) then begin
Memo1.Append('Downloading ' + baseUrl + bookname + '/' + re.Match[1]);
TFPHTTPClient.SimpleGet(baseUrl + bookname + '/' + re.Match[1], targetDirectory + re.Match[1]);
end;
finally
re.Free;
end;
end;end.
No comments:
Post a Comment
Коментар: