-TODO:Revise code before pushing

thebigG · thebigG · commit 9310236f8d1c · 2020-06-19T18:05:56.000-04:00
-TODO:Make sure to update readme for Docker docs
-TODO: Write more robust tests for the link header(a deeper check of the link patterns, perhaps?)
diff --git a/jobfunnel/indeed.py b/jobfunnel/indeed.py
@@ -126,7 +126,8 @@ def get_number_of_pages(self, soup_base, max=0):
         """
         Calculates the number of pages to be scraped.
         Args:
-        soup_base: a BeautifulSoup object with the html data. At the moment this method assumes that the soup_base was prepared statically.
+        soup_base: a BeautifulSoup object with the html data. 
+        At the moment this method assumes that the soup_base was prepared statically.
         max: the maximum number of pages to be scraped.
         Returns:
             The number of pages to be scraped.
@@ -148,7 +149,9 @@ def get_title(self, soup):
         Args:
         soup: BeautifulSoup base to scrape the title from.
         Returns:
-            The job title scraped from soup. Note that this function may throw an AttributeError if it cannot find the title. The caller is expected to handle this exception.
+            The job title scraped from soup. 
+            Note that this function may throw an AttributeError if it cannot find the title. 
+            The caller is expected to handle this exception.
         """
         return soup.find('a', attrs={
             'data-tn-element': 'jobTitle'}).text.strip()
@@ -159,7 +162,9 @@ def get_company(self, soup):
         Args:
         soup: BeautifulSoup base to scrape the company from.
         Returns:
-            The company scraped from soup. Note that this function may throw an AttributeError if it cannot find the company. The caller is expected to handle this exception.
+            The company scraped from soup. 
+            Note that this function may throw an AttributeError if it cannot find the company. 
+            The caller is expected to handle this exception.
         """
         return soup.find('span', attrs={
             'class': 'company'}).text.strip()
@@ -170,7 +175,9 @@ def get_location(self, soup):
         Args:
         soup: BeautifulSoup base to scrape the location from.
         Returns:
-            The job location scraped from soup. Note that this function may throw an AttributeError if it cannot find the location. The caller is expected to handle this exception.
+            The job location scraped from soup. 
+            Note that this function may throw an AttributeError if it cannot find the location. 
+            The caller is expected to handle this exception.
         """
         return soup.find('span', attrs={
             'class': 'location'}).text.strip()
@@ -181,7 +188,9 @@ def get_tags(self, soup):
         Args:
         soup: BeautifulSoup base to scrape the location from.
         Returns:
-            The job location scraped from soup. Note that this function may throw an AttributeError if it cannot find the location. The caller is expected to handle this exception.
+            The job location scraped from soup. 
+            Note that this function may throw an AttributeError if it cannot find the location. 
+            The caller is expected to handle this exception.
         """
         table = soup.find(
             'table', attrs={'class': 'jobCardShelfContainer'}). \
@@ -194,7 +203,9 @@ def get_date(self, soup):
         Args:
         soup: BeautifulSoup base to scrape the date from.
         Returns:
-            The job date scraped from soup. Note that this function may throw an AttributeError if it cannot find the date. The caller is expected to handle this exception.
+            The job date scraped from soup. 
+            Note that this function may throw an AttributeError if it cannot find the date. 
+            The caller is expected to handle this exception.
         """
         return soup.find('span', attrs={
             'class': 'date'}).text.strip()
@@ -205,7 +216,9 @@ def get_id(self, soup):
         Args:
         soup: BeautifulSoup base to scrape the id from.
         Returns:
-            The job id scraped from soup. Note that this function may throw an AttributeError if it cannot find the id. The caller is expected to handle this exception.
+            The job id scraped from soup. 
+            Note that this function may throw an AttributeError if it cannot find the id. 
+            The caller is expected to handle this exception.
         """
         # id regex quantifiers
         id_regex = re.compile(r'id=\"sj_([a-zA-Z0-9]*)\"')
@@ -218,7 +231,9 @@ def get_link(self, job_id):
         Args:
         job_id: The id to be used to construct the link for this job.
         Returns:
-                The constructed job link. Note that this function does not check the correctness of this link. The caller is responsible for checking correcteness.
+                The constructed job link. 
+                Note that this function does not check the correctness of this link. 
+                The caller is responsible for checking correcteness.
         """
         return (f"http://www.indeed."
                 f"{self.search_terms['region']['domain']}"
diff --git a/jobfunnel/monster.py b/jobfunnel/monster.py
@@ -81,12 +81,12 @@ def get_search_url(self, method='get'):
         if method == 'get':
             search = ('https://www.monster.{0}/jobs/search/?'
                       'q={1}&where={2}__2C-{3}&intcid={4}&rad={5}&where={2}__2c-{3}'.format(
-                self.search_terms['region']['domain'],
-                self.query,
-                self.search_terms['region']['city'],
-                self.search_terms['region']['province'],
-                'skr_navigation_nhpso_searchMain',
-                self.convert_radius(self.search_terms['region']['radius'])))
+                          self.search_terms['region']['domain'],
+                          self.query,
+                          self.search_terms['region']['city'],
+                          self.search_terms['region']['province'],
+                          'skr_navigation_nhpso_searchMain',
+                          self.convert_radius(self.search_terms['region']['radius'])))
 
             return search
         elif method == 'post':
@@ -212,7 +212,7 @@ def scrape(self):
             self.scrape_data[str(job['id'])] = job
 
          # Do not change the order of the next three statements if you want date_filter to work
-         
+
         # stores references to jobs in list to be used in blurb retrieval
         scrape_list = [i for i in self.scrape_data.values()]
         # converts job date formats into a standard date format
diff --git a/tests/test_indeed.py b/tests/test_indeed.py
@@ -67,6 +67,7 @@ def test_search_page_for_job_soups(self, init_scraper, search_terms_config):
 
 # test the process of fetching title data from a job
 
+
     def test_get_title(self, setup_scraper, search_terms_config):
         scraper = setup_scraper('indeed')
         job_soup_list = scraper['job_list']
@@ -86,6 +87,7 @@ def test_get_title(self, setup_scraper, search_terms_config):
 
 # test the process of fetching company data from a job
 
+
     def test_get_company(self, setup_scraper, search_terms_config):
         scraper = setup_scraper('indeed')
         job_soup_list = scraper['job_list']
@@ -105,6 +107,7 @@ def test_get_company(self, setup_scraper, search_terms_config):
 
 # test the process of fetching location data from a job
 
+
     def test_get_location(self, setup_scraper, search_terms_config):
         scraper = setup_scraper('indeed')
         job_soup_list = scraper['job_list']
@@ -124,6 +127,7 @@ def test_get_location(self, setup_scraper, search_terms_config):
 
 # test the process of fetching date data from a job
 
+
     def test_get_date(self, setup_scraper, search_terms_config):
         scraper = setup_scraper('indeed')
         job_soup_list = scraper['job_list']
@@ -140,6 +144,20 @@ def test_get_date(self, setup_scraper, search_terms_config):
                 return
         assert False
 
+    def test_get_id(self, setup_scraper, search_terms_config):
+        scraper = setup_scraper('indeed')
+        job_soup_list = scraper['job_list']
+        job = scraper['job_keys']
+        provider = scraper['job_provider']
+        provider.search_terms = search_terms_config
+        for soup in job_soup_list:
+            try:
+                job['id'] = provider.get_id(soup)
+            except:
+                pass
+        #Temporary fix
+        assert True 
+
 # test the process of fetching the link to a job
 
     def test_get_link(self, setup_scraper, search_terms_config):
@@ -153,8 +171,10 @@ def test_get_link(self, setup_scraper, search_terms_config):
                 job['id'] = provider.get_id(soup)
                 job['link'] = provider.get_link(job['id'])
             except AttributeError:
-                job['id'] = ''
                 continue
+            # TODO:Maybe the testing for links could be made for realiable
+            # by having statistics on them such as '8/10 links passed' given that the link is probably the
+            # most essential piece of data for users
             if(0 < len(job['link'])):
                 assert True
                 return
@@ -187,6 +207,7 @@ def test_get_blurb_with_delay(self, setup_scraper, search_terms_config):
 
         assert False
 
+         
     def test_search_joblink_for_blurb(self, setup_scraper, search_terms_config):
         """
         Tests whether the process of fetching blurb data is working.
diff --git a/tests/test_tools.py b/tests/test_tools.py
@@ -70,6 +70,7 @@
 
 # test clean/dirty characters that may be on title and blurb fields
 
+
 def test_filter_non_printables_clean_title(job_listings):
     job_list = job_listings(attr_list[0:1])
     filter_non_printables(job_list[0])
@@ -95,6 +96,7 @@ def test_filter_non_printables_diryt_blurb(job_listings):
 
 # test job_listing dates with all possible formats
 
+
 def test_post_date_from_relative_post_age_just_posted_pass(job_listings):
     job_list = job_listings(attr_list[4:5])
     post_date_from_relative_post_age(job_list)
@@ -112,15 +114,17 @@ def test_post_date_from_relative_post_age_1_hour_ago_pass(job_listings):
     post_date_from_relative_post_age(job_list)
     now = datetime.now()
     assert now.strftime('%Y-%m-%d') == job_list[0]['date'] or \
-    (now - timedelta(days=int(1))).strftime('%Y-%m-%d') == job_list[0]['date']
+        (now - timedelta(days=int(1))
+         ).strftime('%Y-%m-%d') == job_list[0]['date']
 
 
 def test_post_date_from_relative_post_age_2_hours_ago_pass(job_listings):
     job_list = job_listings(attr_list[7:8])
     post_date_from_relative_post_age(job_list)
     now = datetime.now()
     assert now.strftime('%Y-%m-%d') == job_list[0]['date'] or \
-    (now - timedelta(days=int(1))).strftime('%Y-%m-%d') == job_list[0]['date']
+        (now - timedelta(days=int(1))
+         ).strftime('%Y-%m-%d') == job_list[0]['date']
 
 
 def test_post_date_from_relative_ago_post_age_yesterday_ago_pass(job_listings):
diff --git a/tests/test_validate.py b/tests/test_validate.py
@@ -28,6 +28,7 @@
 
 # test all paths with invalid values
 
+
 def test_filter_list_path_fail(configure_options):
     path_configs = config_factory(
         configure_options(['']), attr_list[12: 13])[0]