Editing Exporting Usage Manual
Warning: You are not logged in. Your IP address will be publicly visible if you make any edits. If you log in or create an account, your edits will be attributed to your username, along with other benefits.
The edit can be undone.
Please check the comparison below to verify that this is what you want to do, and then save the changes below to finish undoing the edit.
Latest revision | Your text | ||
Line 14: | Line 14: | ||
import HTMLParser | import HTMLParser | ||
import os | import os | ||
− | + | ||
categories = ['Usage Manual', 'Block Docs'] # Categories which to export, should match the names in wiki | categories = ['Usage Manual', 'Block Docs'] # Categories which to export, should match the names in wiki | ||
directory_names = ['Usage Manual', 'Block Docs'] # names of directories that are created for each category, and prefix to file names | directory_names = ['Usage Manual', 'Block Docs'] # names of directories that are created for each category, and prefix to file names | ||
− | + | ||
# set up web driver | # set up web driver | ||
driver = webdriver.Firefox(executable_path='/home/marc/Downloads/geckodriver') | driver = webdriver.Firefox(executable_path='/home/marc/Downloads/geckodriver') | ||
− | + | ||
for i in range(len(categories)): | for i in range(len(categories)): | ||
# make directory if it doesn't exist | # make directory if it doesn't exist | ||
if not os.path.exists(directory_names[i]): # will be in the same location as this script | if not os.path.exists(directory_names[i]): # will be in the same location as this script | ||
os.makedirs(directory_names[i]) | os.makedirs(directory_names[i]) | ||
− | + | ||
# Go to the wiki's export page | # Go to the wiki's export page | ||
driver.get("https://wiki.gnuradio.org/index.php/Special:Export") | driver.get("https://wiki.gnuradio.org/index.php/Special:Export") | ||
− | + | ||
# fill in text box | # fill in text box | ||
text_area = driver.find_element_by_xpath("//*[@name='catname']") | text_area = driver.find_element_by_xpath("//*[@name='catname']") | ||
text_area.send_keys(categories[i]) | text_area.send_keys(categories[i]) | ||
− | + | ||
# Hit Add button | # Hit Add button | ||
submit_button = driver.find_element_by_xpath("//*[@value='Add']") | submit_button = driver.find_element_by_xpath("//*[@value='Add']") | ||
submit_button.click() | submit_button.click() | ||
− | + | ||
# uncheck "save as file" box | # uncheck "save as file" box | ||
check_box = driver.find_element_by_xpath("//*[@name='wpDownload']") | check_box = driver.find_element_by_xpath("//*[@name='wpDownload']") | ||
check_box.click() | check_box.click() | ||
− | + | ||
# hit Export | # hit Export | ||
submit_button = driver.find_element_by_xpath("//*[@value='Export']") | submit_button = driver.find_element_by_xpath("//*[@value='Export']") | ||
submit_button.click() | submit_button.click() | ||
− | + | ||
# get HTML of new page | # get HTML of new page | ||
raw_html = driver.page_source | raw_html = driver.page_source | ||
start_index = raw_html.find('<page>') | start_index = raw_html.find('<page>') | ||
cropped_html = raw_html[start_index:] | cropped_html = raw_html[start_index:] | ||
− | + | ||
while True: | while True: | ||
indx1 = cropped_html.find('<title>') | indx1 = cropped_html.find('<title>') | ||
indx2 = cropped_html.find('</title>') | indx2 = cropped_html.find('</title>') | ||
− | + | ||
# check if we are done | # check if we are done | ||
if (indx2 - indx1) == 0: # happens when it doesnt find a <title> | if (indx2 - indx1) == 0: # happens when it doesnt find a <title> | ||
break | break | ||
− | + | ||
title = cropped_html[indx1 + 7 : indx2] | title = cropped_html[indx1 + 7 : indx2] | ||
indx3 = cropped_html.find('[[Category:' + categories[i] + ']]') # using this instead of <text> because <text> has different numbers each time | indx3 = cropped_html.find('[[Category:' + categories[i] + ']]') # using this instead of <text> because <text> has different numbers each time | ||
indx4 = cropped_html.find('</text>') | indx4 = cropped_html.find('</text>') | ||
body = cropped_html[indx3 + len(categories[i]) + 13 : indx4] # | body = cropped_html[indx3 + len(categories[i]) + 13 : indx4] # | ||
− | + | ||
# save body to file | # save body to file | ||
h = HTMLParser.HTMLParser() | h = HTMLParser.HTMLParser() | ||
Line 74: | Line 74: | ||
# remove the page we just saved | # remove the page we just saved | ||
cropped_html = cropped_html[indx2 + 8:] # doesnt really matter how much you add here, just needs to move on to the next <title> | cropped_html = cropped_html[indx2 + 8:] # doesnt really matter how much you add here, just needs to move on to the next <title> | ||
− | + | ||
driver.quit() # closes window | driver.quit() # closes window |