Skip to content

Commit 932e145

Browse files
Stabilized v1.2.3
2 parents 2c62796 + 01667de commit 932e145

9 files changed

+552
-10
lines changed

README.md

+8-1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ DPULSE is a software solution for conducting OSINT research in relation to a cer
7878
- SecurityTrails API (deep subdomains and DNS enumeration)
7979
- HudsonRock API (for querying a database with exposed computers which were compromised through global info-stealer campaigns)
8080

81+
5. ***Web-pages snapshoting:*** extended functionality which allows to save web-pages copies in different forms:
82+
- Screenshot snapshotting (saves target domain's page in form of screenshot)
83+
- HTML snapshotting (saves target domain'spage in form of HTML file)
84+
- Wayback Machine snapshotting (saves every version of target domain's page within a user-defined time period)
85+
8186
Finally, DPULSE compiles all found data into an easy-to-read HTML or XLSX report by category. It also saves all information about scan in local report storage database, which can be restored later.
8287

8388
# How to install and run DPULSE
@@ -166,7 +171,7 @@ If you have problems with starting installer.sh, you should try to use `dos2unix
166171
# Tasks to complete before new release
167172
- [x] Add web pages snapshoting (with screenshots)
168173
- [x] Add web pages snapshoting (with web pages copying as HTML objects)
169-
- [ ] Add web pages snapshoting (with Wayback Machine)
174+
- [x] Add web pages snapshoting (with Wayback Machine)
170175

171176
# DPULSE mentions in social medias
172177

@@ -176,6 +181,8 @@ If you have problems with starting installer.sh, you should try to use `dos2unix
176181

177182
### [The very first mention from cybercrime intelligence company (HudsonRock)](https://www.linkedin.com/feed/update/urn:li:share:7294336938495385600/)
178183

184+
### [The very first mention on cybersecurity educational website (Ethical Hackers Academy)](https://ethicalhacksacademy.com/blogs/cyber-security-tools/dpulse)
185+
179186
## X.com mentions:
180187

181188
### [by @DarkWebInformer](https://x.com/DarkWebInformer/status/1787583156775759915?t=Ak1W9ddUPpDvLAkVyQG8fQ&s=19)

datagather_modules/data_assembler.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from screen_snapshotting import take_screenshot
1717
from config_processing import read_config
1818
from html_snapshotting import save_page_as_html
19+
from archive_snapshotting import download_snapshot
1920

2021
try:
2122
import requests
@@ -72,7 +73,7 @@ def report_preprocessing(self, short_domain, report_file_type):
7273
os.makedirs(report_folder, exist_ok=True)
7374
return casename, db_casename, db_creation_date, robots_filepath, sitemap_filepath, sitemap_links_filepath, report_file_type, report_folder, files_ctime, report_ctime
7475

75-
def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, keywords, keywords_flag, dorking_flag, used_api_flag, snapshotting_flag, username):
76+
def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, keywords, keywords_flag, dorking_flag, used_api_flag, snapshotting_flag, username, from_date, end_date):
7677
casename, db_casename, db_creation_date, robots_filepath, sitemap_filepath, sitemap_links_filepath, report_file_type, report_folder, ctime, report_ctime = self.report_preprocessing(short_domain, report_file_type)
7778
logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} STARTS HERE')
7879
print(Fore.GREEN + "Started scanning domain" + Style.RESET_ALL)
@@ -186,6 +187,8 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
186187
take_screenshot(installed_browser, url, report_folder + '//screensnapshot.png')
187188
elif snapshotting_flag.lower() == 'p':
188189
save_page_as_html(url, report_folder + '//domain_html_copy.html')
190+
elif snapshotting_flag.lower() == 'w':
191+
download_snapshot(short_domain, from_date, end_date, report_folder)
189192
print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN END: PAGE SNAPSHOTTING]\n" + Style.RESET_ALL)
190193
else:
191194
pass
@@ -269,6 +272,8 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
269272
take_screenshot(installed_browser, url, report_folder + '//screensnapshot.png')
270273
elif snapshotting_flag.lower() == 'p':
271274
save_page_as_html(url, report_folder + '//domain_html_copy.html')
275+
elif snapshotting_flag.lower() == 'w':
276+
download_snapshot(short_domain, from_date, end_date, report_folder)
272277
print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN END: PAGE SNAPSHOTTING]\n" + Style.RESET_ALL)
273278
else:
274279
pass

dpulse.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,17 @@
5353
cli = cli_init.Menu()
5454
cli.welcome_menu()
5555

56-
def process_report(report_filetype, short_domain, url, case_comment, keywords_list, keywords_flag, dorking_flag, used_api_flag, pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username):
56+
def process_report(report_filetype, short_domain, url, case_comment, keywords_list, keywords_flag, dorking_flag, used_api_flag, pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username, from_date, end_date):
5757
import xlsx_report_creation as xlsx_rc
5858
import html_report_creation as html_rc
5959
from misc import time_processing
6060

6161
try:
6262
start = time()
6363
if pagesearch_flag in ['y', 'si']:
64-
data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), keywords_list, keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username)
64+
data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), keywords_list, keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username, from_date, end_date)
6565
else:
66-
data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), '', keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username)
66+
data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), '', keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username, from_date, end_date)
6767
end = time() - start
6868
endtime_string = time_processing(end)
6969

@@ -164,7 +164,7 @@ def run():
164164
else:
165165
print(Fore.RED + "\nInvalid API usage mode" + Style.RESET_ALL)
166166
break
167-
snapshotting_flag = input(Fore.YELLOW + "Select Snapshotting mode [S(creenshot)/P(age Copy)/N (for None)] >> ")
167+
snapshotting_flag = input(Fore.YELLOW + "Select Snapshotting mode [S(creenshot)/P(age Copy)/W(ayback Machine)/N (for None)] >> ")
168168
if pagesearch_flag.lower() == 'y' or pagesearch_flag.lower() == 'n':
169169
if pagesearch_flag.lower() == "n":
170170
pagesearch_ui_mark = 'No'
@@ -196,11 +196,16 @@ def run():
196196
break
197197
else:
198198
snapshotting_ui_mark = 'No'
199+
from_date = end_date = 'N'
199200
if snapshotting_flag.lower() == 's':
201+
from_date = end_date = 'N'
200202
snapshotting_ui_mark = "Yes, domain's main page snapshotting as a screenshot"
201203
elif snapshotting_flag.lower() == 'p':
204+
from_date = end_date = 'N'
202205
snapshotting_ui_mark = "Yes, domain's main page snapshotting as a .HTML file"
203206
elif snapshotting_flag.lower() == 'w': # not supported at the moment
207+
from_date = str(input('Enter start date (YYYYMMDD format): '))
208+
end_date = str(input('Enter end date (YYYYMMDD format): '))
204209
snapshotting_ui_mark = "Yes, domain's main page snapshotting using Wayback Machine"
205210
cli_init.print_prescan_summary(short_domain, report_filetype.upper(), pagesearch_ui_mark, dorking_ui_mark, used_api_ui, case_comment, snapshotting_ui_mark)
206211
print(Fore.LIGHTMAGENTA_EX + "[BASIC SCAN START]\n" + Style.RESET_ALL)
@@ -209,7 +214,7 @@ def run():
209214
if report_filetype.lower() in ['html', 'xlsx']:
210215
process_report(report_filetype, short_domain, url, case_comment,
211216
keywords_list, keywords_flag, dorking_flag, used_api_flag,
212-
pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username)
217+
pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username, from_date, end_date)
213218
else:
214219
print(Fore.RED + "\nUnsupported PageSearch mode. Please choose between Y or N")
215220

service/cli_init.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def welcome_menu(self):
2020
fig = Figlet(font=wm_font)
2121
print('\n')
2222
self.console.print(fig.renderText('DPULSE'), style=preview_style)
23-
print(Fore.MAGENTA + Style.BRIGHT + '[DPULSE-CLI] - [v1.2.2 stable] - [OSINT-TECHNOLOGIES]\n' + Style.RESET_ALL)
23+
print(Fore.MAGENTA + Style.BRIGHT + '[DPULSE-CLI] - [v1.2.3 stable] - [OSINT-TECHNOLOGIES]\n' + Style.RESET_ALL)
2424
print(Fore.MAGENTA + Style.BRIGHT + '[Visit our pages]\nGitHub repository: https://github.com/OSINT-TECHNOLOGIES\nPyPi page: https://pypi.org/project/dpulse/\nDocumentation: https://dpulse.readthedocs.io' + Style.RESET_ALL)
2525

2626
def print_main_menu(self):

service/config_processing.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@ def create_config():
2727
]
2828

2929
config = configparser.ConfigParser()
30+
config['HTML_REPORTING'] = {'template': 'default'}
3031
config['LOGGING'] = {'log_level': 'info'}
3132
config['CLI VISUAL'] = {'preview_color': 'red', 'font': 'slant'}
3233
config['DORKING'] = {'dorking_delay (secs)': '2', 'delay_step': '5'}
33-
config['SNAPSHOTTING'] = {'installed_browser': 'firefox', 'opera_browser_path': 'None'}
34+
config['SNAPSHOTTING'] = {'installed_browser': 'firefox', 'opera_browser_path': 'None', 'wayback_retries': '3', 'wayback_req_pause': '2'}
3435
config['USER-AGENTS'] = {}
3536
for i, agent in enumerate(basic_user_agents):
3637
config['USER-AGENTS'][f'agent_{i + 1}'] = agent
@@ -56,6 +57,9 @@ def read_config():
5657
proxies_file_path = config.get('PROXIES', 'proxies_file_path')
5758
installed_browser = config.get('SNAPSHOTTING', 'installed_browser')
5859
opera_browser_path = config.get('SNAPSHOTTING', 'opera_browser_path')
60+
wayback_retries_amount = config.get('SNAPSHOTTING', 'wayback_retries')
61+
wayback_requests_pause = config.get('SNAPSHOTTING', 'wayback_req_pause')
62+
html_report_template = config.get('HTML_REPORTING', 'template')
5963

6064

6165
config_values = {
@@ -67,7 +71,10 @@ def read_config():
6771
'user_agents': user_agents,
6872
'proxies_file_path': proxies_file_path,
6973
'installed_browser': installed_browser,
70-
'opera_browser_path': opera_browser_path
74+
'opera_browser_path': opera_browser_path,
75+
'wayback_retries_amount': wayback_retries_amount,
76+
'wayback_requests_pause': wayback_requests_pause,
77+
'template': html_report_template
7178
}
7279

7380
return config_values
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset="UTF-8">
5+
<style>
6+
body { font-family: Arial, sans-serif; line-height: 1.6; }
7+
h3 { text-align: center; margin-top: 20px; }
8+
pre { background: #f8f8f8; padding: 5px; border: 1px solid #ddd; }
9+
.section { margin: 10px 40px; }
10+
</style>
11+
</head>
12+
<body>
13+
14+
<h3>OPEN SOURCE RESEARCH REPORT</h3>
15+
<p><b>Organization:</b> {{org}}</p>
16+
<hr />
17+
18+
<h3>TABLE OF CONTENTS</h3>
19+
<div class="section">
20+
<p>1. General scan information</p>
21+
<p>2. WHOIS information</p>
22+
<p>3. Social medias links</p>
23+
<p>4. Subdomains information</p>
24+
<p>5. DNS & SSL information</p>
25+
<p>6. Services & frameworks</p>
26+
<p>7. Basic pre-pentest information</p>
27+
<p>8. Dorking scan info</p>
28+
<p>9. PageSearch results</p>
29+
<p>10. API scan results</p>
30+
</div>
31+
<hr />
32+
33+
<h3>GENERAL SCAN INFO</h3>
34+
<div class="section">
35+
<p><b>Total subdomains:</b> {{a_tsf}}</p>
36+
<p><b>Social media links:</b> {{a_tsm}}</p>
37+
<pre>Robots.txt: {{robots_txt_result}}
38+
Sitemap.xml: {{sitemap_xml_result}}
39+
Dorking: {{dorking_status}}</pre>
40+
</div>
41+
<hr />
42+
43+
<h3>WHOIS INFORMATION</h3>
44+
<div class="section">
45+
<p><b>Domain:</b> {{sh_domain}} <b>URL:</b> {{full_url}}</p>
46+
<pre>IP: {{ip_address}}
47+
Registrar: {{registrar}}
48+
Dates: {{creation_date}} → {{expiration_date}}</pre>
49+
</div>
50+
<hr />
51+
52+
<h3>SOCIAL MEDIAS</h3>
53+
<div class="section">
54+
<p><b>Facebook:</b></p><pre>{% for l in fb_links %}⇒ {{ l }}{% endfor %}</pre>
55+
<p><b>Twitter/X:</b></p><pre>{% for l in tw_links+xcom_links %}⇒ {{ l }}{% endfor %}</pre>
56+
<p><b>Instagram:</b></p><pre>{% for l in inst_links %}⇒ {{ l }}{% endfor %}</pre>
57+
</div>
58+
<hr />
59+
60+
<h3>SUBDOMAINS</h3>
61+
<div class="section">
62+
<p><b>Found subdomains:</b></p><pre>{% for sd in subdomains %}⇒ {{ sd }}{% endfor %}</pre>
63+
<p><b>IPs:</b></p><pre>{% for sdip in subdomain_ip %}⇒ {{ sdip }}{% endfor %}</pre>
64+
</div>
65+
<hr />
66+
67+
<h3>DNS/SSL</h3>
68+
<div class="section">
69+
<pre>NS: {{name_servers}}
70+
MX: {{mx_records}}
71+
SSL Issuer: {{issuer}}
72+
NotBefore: {{notBefore}}
73+
NotAfter: {{notAfter}}</pre>
74+
</div>
75+
<hr />
76+
77+
<h3>SERVICES</h3>
78+
<div class="section">
79+
<p><b>Web servers:</b></p><pre>{% for ws in web_servers %}⇒ {{ ws }}{% endfor %}</pre>
80+
<p><b>CMS:</b></p><pre>{% for cm in cms %}⇒ {{ cm }}{% endfor %}</pre>
81+
<p><b>Languages:</b></p><pre>{% for pl in programming_languages %}⇒ {{ pl }}{% endfor %}</pre>
82+
</div>
83+
<hr />
84+
85+
<h3>BASIC PRE-PENTEST</h3>
86+
<div class="section">
87+
<p><b>Open ports:</b></p><pre>{% for op in ports %}⇒ {{ op }}{% endfor %}</pre>
88+
<p><b>Vulnerabilities:</b></p><pre>{% for vuln in vulns %}⇒ {{ vuln }}{% endfor %}</pre>
89+
</div>
90+
<hr />
91+
92+
<h3>DORKING SCAN</h3>
93+
<div class="section"><pre>{{ add_dsi | safe }}</pre></div>
94+
<hr />
95+
96+
<h3>PAGESEARCH</h3>
97+
<div class="section">
98+
<pre>Subdomains: {{ps_s}}
99+
Emails: {{ps_e}}
100+
Documents: {{ps_f}}
101+
</pre>
102+
</div>
103+
<hr />
104+
105+
<h3>VIRUSTOTAL</h3>
106+
<div class="section"><pre>{{ virustotal_output }}</pre></div>
107+
<h3>SECURITYTRAILS</h3>
108+
<div class="section"><pre>{{ securitytrails_output }}</pre></div>
109+
<h3>HUDSONROCK</h3>
110+
<div class="section"><pre>{{ hudsonrock_output }}</pre></div>
111+
<hr />
112+
113+
<p style="text-align:center;">Created by DPULSE (OSINT-TECHNOLOGIES)</p>
114+
<p style="text-align:center;">
115+
<a href="https://github.com/OSINT-TECHNOLOGIES">GitHub</a> |
116+
<a href="https://pypi.org/project/dpulse/">PyPI</a>
117+
</p>
118+
119+
</body>
120+
</html>

0 commit comments

Comments
 (0)