Editing Exporting Usage Manual

Jump to: navigation, search

Warning: You are not logged in. Your IP address will be publicly visible if you make any edits. If you log in or create an account, your edits will be attributed to your username, along with other benefits.

The edit can be undone. Please check the comparison below to verify that this is what you want to do, and then save the changes below to finish undoing the edit.
Latest revision Your text
Line 14: Line 14:
 
     import HTMLParser  
 
     import HTMLParser  
 
     import os
 
     import os
   
+
 
 
     categories = ['Usage Manual', 'Block Docs'] # Categories which to export, should match the names in wiki
 
     categories = ['Usage Manual', 'Block Docs'] # Categories which to export, should match the names in wiki
 
     directory_names = ['Usage Manual', 'Block Docs'] # names of directories that are created for each category, and prefix to file names
 
     directory_names = ['Usage Manual', 'Block Docs'] # names of directories that are created for each category, and prefix to file names
   
+
 
 
     # set up web driver
 
     # set up web driver
 
     driver = webdriver.Firefox(executable_path='/home/marc/Downloads/geckodriver')
 
     driver = webdriver.Firefox(executable_path='/home/marc/Downloads/geckodriver')
       
+
       
 
     for i in range(len(categories)):
 
     for i in range(len(categories)):
 
         # make directory if it doesn't exist
 
         # make directory if it doesn't exist
 
         if not os.path.exists(directory_names[i]): # will be in the same location as this script
 
         if not os.path.exists(directory_names[i]): # will be in the same location as this script
 
             os.makedirs(directory_names[i])
 
             os.makedirs(directory_names[i])
   
+
 
 
         # Go to the wiki's export page
 
         # Go to the wiki's export page
 
         driver.get("https://wiki.gnuradio.org/index.php/Special:Export")
 
         driver.get("https://wiki.gnuradio.org/index.php/Special:Export")
   
+
 
 
         # fill in text box
 
         # fill in text box
 
         text_area = driver.find_element_by_xpath("//*[@name='catname']")
 
         text_area = driver.find_element_by_xpath("//*[@name='catname']")
 
         text_area.send_keys(categories[i])
 
         text_area.send_keys(categories[i])
   
+
 
 
         # Hit Add button
 
         # Hit Add button
 
         submit_button = driver.find_element_by_xpath("//*[@value='Add']")
 
         submit_button = driver.find_element_by_xpath("//*[@value='Add']")
 
         submit_button.click()  
 
         submit_button.click()  
   
+
 
 
         # uncheck "save as file" box
 
         # uncheck "save as file" box
 
         check_box = driver.find_element_by_xpath("//*[@name='wpDownload']")
 
         check_box = driver.find_element_by_xpath("//*[@name='wpDownload']")
 
         check_box.click()
 
         check_box.click()
   
+
 
 
         # hit Export
 
         # hit Export
 
         submit_button = driver.find_element_by_xpath("//*[@value='Export']")
 
         submit_button = driver.find_element_by_xpath("//*[@value='Export']")
 
         submit_button.click()  
 
         submit_button.click()  
   
+
 
 
         # get HTML of new page
 
         # get HTML of new page
 
         raw_html = driver.page_source
 
         raw_html = driver.page_source
 
         start_index = raw_html.find('<page>')
 
         start_index = raw_html.find('<page>')
 
         cropped_html = raw_html[start_index:]
 
         cropped_html = raw_html[start_index:]
   
+
 
 
         while True:
 
         while True:
 
             indx1 = cropped_html.find('<title>')
 
             indx1 = cropped_html.find('<title>')
 
             indx2 = cropped_html.find('</title>')
 
             indx2 = cropped_html.find('</title>')
           
+
           
 
             # check if we are done
 
             # check if we are done
 
             if (indx2 - indx1) == 0: # happens when it doesnt find a <title>
 
             if (indx2 - indx1) == 0: # happens when it doesnt find a <title>
 
                 break
 
                 break
               
+
               
 
             title = cropped_html[indx1 + 7 : indx2]
 
             title = cropped_html[indx1 + 7 : indx2]
 
             indx3 = cropped_html.find('[[Category:' + categories[i] + ']]') # using this instead of <text> because <text> has different numbers each time
 
             indx3 = cropped_html.find('[[Category:' + categories[i] + ']]') # using this instead of <text> because <text> has different numbers each time
 
             indx4 = cropped_html.find('</text>')
 
             indx4 = cropped_html.find('</text>')
 
             body = cropped_html[indx3 + len(categories[i]) + 13 : indx4] #  
 
             body = cropped_html[indx3 + len(categories[i]) + 13 : indx4] #  
           
+
           
 
             # save body to file
 
             # save body to file
 
             h = HTMLParser.HTMLParser()
 
             h = HTMLParser.HTMLParser()
Line 74: Line 74:
 
             # remove the page we just saved
 
             # remove the page we just saved
 
             cropped_html = cropped_html[indx2 + 8:] # doesnt really matter how much you add here, just needs to move on to the next <title>
 
             cropped_html = cropped_html[indx2 + 8:] # doesnt really matter how much you add here, just needs to move on to the next <title>
   
+
 
 
     driver.quit() # closes window
 
     driver.quit() # closes window

Please note that all contributions to GNU Radio are considered to be released under the Creative Commons Attribution-ShareAlike (see GNU Radio:Copyrights for details). If you do not want your writing to be edited mercilessly and redistributed at will, then do not submit it here.
You are also promising us that you wrote this yourself, or copied it from a public domain or similar free resource. Do not submit copyrighted work without permission!

To edit this page, please answer the question that appears below (more info):

Cancel | Editing help (opens in new window)