Exporting Block Docs
Jump to navigation
Jump to search
This is just the python code from Exporting Usage Manual, modified to export all of the Block Docs (the one-page-per-block User Documentation)
from selenium import webdriver from selenium.webdriver.common.keys import Keys import time import HTMLParser import os # Settings pages_to_save = ['Polymorphic Types (PMTs)', 'Message Passing', 'QT GUI', 'Logging', 'Performance Counters', 'Tagged Stream Blocks',] # make directory if it doesn't exist directory_name = 'Usage Manaul' if not os.path.exists(directory_name): # will be in the same location as this script os.makedirs(directory_name) # set up web driver driver = webdriver.Firefox(executable_path='/home/marc/Downloads/geckodriver') for page_name in pages_to_save: driver.get("https://wiki.gnuradio.org/index.php/Special:Export") # fill in text box text_area = driver.find_element_by_xpath("//*[@name='pages']") text_area.send_keys(page_name) # uncheck "save as file" box check_box = driver.find_element_by_xpath("//*[@name='wpDownload']") check_box.click() # hit Export submit_button = driver.find_element_by_xpath("//*[@value='Export']") submit_button.click() # get HTML of new page raw_html = driver.page_source start_index = raw_html.find('<page>') cropped_html = raw_html[start_index:] # save text to file h = HTMLParser.HTMLParser() cropped_html_text = h.unescape(cropped_html) # makes it so stuff like > shows up as a greater than sign file_name = "Usage Manual- " + page_name + ".txt" text_file = open(directory_name + '/' + file_name, "w") text_file.write(cropped_html_text) text_file.close() driver.close()