Exporting Usage Manual: Difference between revisions
(Created page with "One of the issues people raised with moving the non-block/function/class specific documentation from Doxygen to wiki is that people won't have a local copy of the Usage Manual...") |
No edit summary |
||
(10 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
One of the issues people raised with moving the non-block/function/class specific documentation from Doxygen to wiki is that people won't have a local copy of the Usage Manual anymore. To solve this I put together a little script that exports a list of pages on this wiki to the raw source which looks almost exactly the same as the doxygen source. This script just has to be run a few times a year and then the resulting files committed to gnuradio git (the location of these files within the git repo has yet to be determined). | One of the issues people raised with moving the non-block/function/class specific documentation from Doxygen to this wiki is that people won't have a local copy of the Usage Manual anymore. To solve this I put together a little script that exports a list of pages on this wiki to the raw source which looks almost exactly the same as the doxygen source. This script just has to be run a few times a year and then the resulting files committed to gnuradio git (the location of these files within the git repo has yet to be determined). | ||
from selenium import webdriver | To use this script, you will have to download geckodriver and change the line that points to where it is located. After running it there should be a new directory called "Usage Manual" created in whatever directory you ran the script from, containing a bunch of text files. | ||
from selenium.webdriver.common.keys import Keys | |||
# Download latest version of geckodriver from [https://github.com/mozilla/geckodriver/releases here] and extract it to a known location, then modify the line "driver = webdriver.Firefox(..." below (I've had the best luck with v0.21) | |||
# sudo pip install selenium HTMLParser | |||
# Make sure the two versions match, see [https://firefox-source-docs.mozilla.org/testing/geckodriver/geckodriver/Support.html this table] | |||
Note that this script now also exports the Blocks Docs pages | |||
from selenium import webdriver | |||
from selenium.webdriver.common.keys import Keys | |||
import time | |||
import HTMLParser | |||
import os | |||
categories = ['Usage Manual', 'Block Docs'] # Categories which to export, should match the names in wiki | |||
directory_names = ['Usage Manual', 'Block Docs'] # names of directories that are created for each category, and prefix to file names | |||
# set up web driver | |||
driver = webdriver.Firefox(executable_path='/home/marc/Downloads/geckodriver') | |||
for i in range(len(categories)): | |||
# make directory if it doesn't exist | |||
if not os.path.exists(directory_names[i]): # will be in the same location as this script | |||
os.makedirs(directory_names[i]) | |||
# Go to the wiki's export page | |||
driver.get("https://wiki.gnuradio.org/index.php/Special:Export") | |||
# uncheck "save as file" box | |||
# fill in text box | |||
text_area = driver.find_element_by_xpath("//*[@name='catname']") | |||
text_area.send_keys(categories[i]) | |||
# Hit Add button | |||
submit_button = driver.find_element_by_xpath("//*[@value='Add']") | |||
submit_button.click() | |||
# uncheck "save as file" box | |||
check_box = driver.find_element_by_xpath("//*[@name='wpDownload']") | |||
check_box.click() | |||
# hit Export | |||
submit_button = driver.find_element_by_xpath("//*[@value='Export']") | |||
submit_button.click() | |||
# get HTML of new page | |||
raw_html = driver.page_source | |||
start_index = raw_html.find('<page>') | |||
cropped_html = raw_html[start_index:] | |||
while True: | |||
indx1 = cropped_html.find('<title>') | |||
indx2 = cropped_html.find('</title>') | |||
# check if we are done | |||
if (indx2 - indx1) == 0: # happens when it doesnt find a <title> | |||
break | |||
title = cropped_html[indx1 + 7 : indx2] | |||
indx3 = cropped_html.find('[[Category:' + categories[i] + ']]') # using this instead of <text> because <text> has different numbers each time | |||
indx4 = cropped_html.find('</text>') | |||
body = cropped_html[indx3 + len(categories[i]) + 13 : indx4] # | |||
# save body to file | |||
h = HTMLParser.HTMLParser() | |||
body_text = h.unescape(body) # makes it so stuff like > shows up as a greater than sign | |||
file_name = directory_names[i] + "- " + title + ".txt" | |||
text_file = open(directory_names[i] + '/' + file_name, "w") | |||
text_file.write('===' + title + '===\n') | |||
text_file.write(body_text) | |||
text_file.close() | |||
# remove the page we just saved | |||
cropped_html = cropped_html[indx2 + 8:] # doesnt really matter how much you add here, just needs to move on to the next <title> | |||
driver.quit() # closes window |
Latest revision as of 03:28, 13 March 2019
One of the issues people raised with moving the non-block/function/class specific documentation from Doxygen to this wiki is that people won't have a local copy of the Usage Manual anymore. To solve this I put together a little script that exports a list of pages on this wiki to the raw source which looks almost exactly the same as the doxygen source. This script just has to be run a few times a year and then the resulting files committed to gnuradio git (the location of these files within the git repo has yet to be determined).
To use this script, you will have to download geckodriver and change the line that points to where it is located. After running it there should be a new directory called "Usage Manual" created in whatever directory you ran the script from, containing a bunch of text files.
- Download latest version of geckodriver from here and extract it to a known location, then modify the line "driver = webdriver.Firefox(..." below (I've had the best luck with v0.21)
- sudo pip install selenium HTMLParser
- Make sure the two versions match, see this table
Note that this script now also exports the Blocks Docs pages
from selenium import webdriver from selenium.webdriver.common.keys import Keys import time import HTMLParser import os categories = ['Usage Manual', 'Block Docs'] # Categories which to export, should match the names in wiki directory_names = ['Usage Manual', 'Block Docs'] # names of directories that are created for each category, and prefix to file names # set up web driver driver = webdriver.Firefox(executable_path='/home/marc/Downloads/geckodriver') for i in range(len(categories)): # make directory if it doesn't exist if not os.path.exists(directory_names[i]): # will be in the same location as this script os.makedirs(directory_names[i]) # Go to the wiki's export page driver.get("https://wiki.gnuradio.org/index.php/Special:Export") # fill in text box text_area = driver.find_element_by_xpath("//*[@name='catname']") text_area.send_keys(categories[i]) # Hit Add button submit_button = driver.find_element_by_xpath("//*[@value='Add']") submit_button.click() # uncheck "save as file" box check_box = driver.find_element_by_xpath("//*[@name='wpDownload']") check_box.click() # hit Export submit_button = driver.find_element_by_xpath("//*[@value='Export']") submit_button.click() # get HTML of new page raw_html = driver.page_source start_index = raw_html.find('<page>') cropped_html = raw_html[start_index:] while True: indx1 = cropped_html.find('<title>') indx2 = cropped_html.find('</title>') # check if we are done if (indx2 - indx1) == 0: # happens when it doesnt find a <title> break title = cropped_html[indx1 + 7 : indx2] indx3 = cropped_html.find('[[Category:' + categories[i] + ']]') # using this instead of <text> because <text> has different numbers each time indx4 = cropped_html.find('</text>') body = cropped_html[indx3 + len(categories[i]) + 13 : indx4] # # save body to file h = HTMLParser.HTMLParser() body_text = h.unescape(body) # makes it so stuff like > shows up as a greater than sign file_name = directory_names[i] + "- " + title + ".txt" text_file = open(directory_names[i] + '/' + file_name, "w") text_file.write('===' + title + '===\n') text_file.write(body_text) text_file.close() # remove the page we just saved cropped_html = cropped_html[indx2 + 8:] # doesnt really matter how much you add here, just needs to move on to the next <title> driver.quit() # closes window