Upload
phunghanh
View
213
Download
0
Embed Size (px)
Citation preview
LSA
September 19, 2018
1 Latent Semantic Analyser (Unsupervised Learning)
1.1 An easy way for reducing dimensionality of the data
1.1.1 Importing Libraries
In [1]: import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer
from sklearn.linear_model import LogisticRegression
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from sklearn.decomposition import TruncatedSVD
import pandas as pd
1.1.2 Reading data files and adding more stopwords
In [2]: wordnet_lemmatiser = WordNetLemmatizer()
titles = [line.rstrip() for line in open("all_book_titles.txt")]
stop_words = set(w.rstrip() for w in open("stopwords.txt"))
stop_words = stop_words.union({
'introduction', 'edition', 'series', 'application',
'approach', 'card', 'access', 'package', 'plus', 'etext',
'brief', 'vol', 'fundamental', 'guide', 'essential', 'printed',
'third', 'second', 'fourth','and', 'the'})
1.1.3 A custom tokeniser to process every title and extract morphene form of the words out of
it
In [3]: def my_tokeniser(s):
s = s.lower()
tokens = nltk.tokenize.word_tokenize(s)
tokens = [t for t in tokens if len(t)>2]
tokens = [wordnet_lemmatiser.lemmatize(t) for t in tokens]
tokens = [t for t in tokens if t not in stop_words]
tokens = [t for t in tokens if not any(c.isdigit() for c in t)]
return tokens
1
# sample_string = "This is a test sentence to check how the custom tokeniser is working.
# my_tokens = my_tokeniser(sample_string)
# my_tokens
1.1.4 Preparing a word index mapper which maps every word to index in the sentence in
which it occurs
For every review, we break it into a set of tokens and then for each token we check whether itexists in a word to index mapper or not. If it exists we skip it else we store the word in the mapperwith index as the current index of the word index mapper. Word index mapper is required so thatwe can pass tokens to it and it returns the location of the word in the dataframe where we canassign a vector value to it
In [4]: word_index_map = {}
current_index = 0
all_tokens = []
all_titles = []
index_word_map = []
error_count = 0
for title in titles:
try:
title = title.encode('ascii', 'ignore').decode('utf-8') # this will throw exception
all_titles.append(title)
tokens = my_tokeniser(title)
all_tokens.append(tokens)
for token in tokens:
if token not in word_index_map:
word_index_map[token] = current_index
current_index += 1
index_word_map.append(token)
except Exception as e:
print(e)
print(title)
error_count += 1
In [5]: word_index_map["reader"]
Out[5]: 3
In [6]: # Function to convert tokens of a given review and return a "1" at the corresponding location
def tokens_to_vector(tokens):
x=np.zeros(len(word_index_map))
for t in tokens:
i = word_index_map[t]
x[i]=1
return x
In [7]: # Preparing a NXD Matrix
2
N = len(all_tokens) # N=Total number of reviews processed
D = len(word_index_map) # D=Total number of unique tokens extracted out of the reviews
X = np.zeros((D,N)) # Generating a matrix
i=0
In [8]: for tokens in all_tokens:
X[:,i] = tokens_to_vector(tokens)
i+=1
In [9]: all_tokens
Out[9]: [['philosophy', 'sex', 'love', 'reader'],
['reading', 'judaism', 'christianity', 'islam'],
['microprocessor', 'principle'],
['bernhard', 'edouard', 'fernow', 'story', 'north', 'american', 'forestry'],
['encyclopedia', 'buddhism'],
['motorola', 'microprocessor', 'family', 'programming', 'interfacing'],
['american', 'anthem', 'student', 'modern', 'era'],
['read',
'literature',
'professor',
'lively',
'entertaining',
'reading',
'line'],
['mar',
'woman',
'venus',
'secret',
'sex',
'improving',
'communication',
'lasting',
'intimacy',
'fulfillment',
'giving',
'receiving',
'love',
'secret',
'passion',
'understanding',
'martian'],
['religious',
'tradition',
'world',
'journey',
'africa',
'mesoamerica',
3
'north',
'america',
'judaism',
'christianity',
'islam',
'hinduism',
'buddhism',
'china'],
['world', 'wisdom', 'sacred', 'text', 'world', 'religion'],
['illustrated', 'world', 'religion', 'wisdom', 'tradition'],
['soul', 'sex', 'cultivating', 'life', 'act', 'love'],
['thriving', 'chaos', 'handbook', 'management', 'revolution'],
['blood', 'relative'],
['wheelock', 'latin'],
['choice', 'uncertainty', 'chaos', 'luck', 'thrive', 'despite'],
['expanding', 'discourse', 'feminism', 'art', 'history'],
['relativity', 'pure', 'applied', 'physic'],
['experiment', 'microprocessor', 'interfacing', 'programming', 'hardware'],
['pathophysiology', 'concept', 'health', 'care', 'professional'],
['power', 'system', 'operation'],
['machine', 'learning'],
['e-commerce', 'strategy', 'technology'],
['real', 'complex', 'analysis'],
['schaum', 'outline', 'microprocessor'],
['schaum', 'microprocessor', 'roger', 'tokheim', 'paperback'],
['probability', 'random', 'variable', 'stochastic', 'process'],
['medical', 'microbiology', 'immunology', 'picture'],
['security', 'analysis', 'sixth', 'foreword', 'warren', 'buffett'],
['goodman',
'gilman',
'pharmacological',
'basis',
'therapeutic',
'twelfth',
'goodman',
'gilman',
'pharmacological',
'basis',
'therapeutic'],
['oca/ocp',
'oracle',
'database',
'all-in-one',
'exam',
'cd-rom',
'exam',
'oracle',
'press'],
4
['clinical',
'ethic',
'practical',
'ethical',
'decision',
'clinical',
'medicine',
'seventh',
'lange',
'clinical',
'science'],
['understand', 'linguistics', 'teach', 'yourself'],
['harrison', 'principle', 'internal', 'medicine'],
['basic', 'clinical', 'pharmacology', 'lange', 'basic', 'science'],
['harrison',
'principle',
'internal',
'medicine',
'self-assessment',
'board',
'review'],
['strategic',
'database',
'marketing',
'masterplan',
'starting',
'managing',
'profitable',
'customer-based',
'marketing',
'program'],
['review',
'medical',
'microbiology',
'immunology',
'twelfth',
'lange',
'medical',
'book'],
['tintinalli',
'emergency',
'medicine',
'manual',
'emergency',
'medicine',
'tintinalli'],
['pharmacotherapy', 'pathophysiologic'],
['review', 'medical', 'microbiology', 'immunology'],
5
['computer', 'organization'],
['risk', 'management', 'insurance'],
['design', 'analog', 'cmos', 'integrated', 'circuit'],
['plant', 'design', 'economics', 'chemical', 'engineer'],
['fluid', 'mechanic', 'engineering'],
['operating', 'system', 'spiral'],
['anatomy', 'physiology'],
['database', 'management', 'system'],
['probability',
'statistic',
'principle',
'engineering',
'computing',
'science'],
['health', 'program', 'planning', 'educational', 'ecological'],
['geographic',
'information',
'system',
'forestry',
'natural',
'resource',
'management'],
['fluid', 'mechanic', 'chemical', 'engineer'],
['adaptation', 'studying', 'film', 'literature'],
['american', 'art', 'history', 'culture'],
['evolution', 'earth'],
['woman', 'study', 'gender', 'transnational', 'world'],
['aerodynamics'],
['forensics', 'criminalistics'],
['biological', 'evolution'],
['anatomy', 'physiology', 'integrative'],
['chemical',
'engineering',
'thermodynamics',
'chemical',
'engineering',
'thermodynamics'],
['human', 'geography'],
['theater', 'experience'],
['bioinformatics', 'computing', 'perspective'],
['experience', 'sociology'],
['electronic', 'principle'],
['earth', 'earth', 'science'],
['world', 'regional', 'geography'],
['physical', 'geology'],
['stern', 'introductory', 'plant', 'biology'],
['business', 'statistic', 'communicating'],
['digital', 'electronics', 'principle'],
6
['medical', 'insurance', 'pharmacy', 'technician'],
['insurance', 'medical', 'office', 'patient', 'payment'],
['law', 'ethic', 'health', 'profession'],
['foundation', 'microbiology'],
['microbiology', 'human', 'perspective'],
['programming', 'java', 'solving'],
['modern', 'computer', 'network', 'source'],
['business', 'driven', 'information', 'system'],
['real',
'estate',
'finance',
'investment',
'real',
'estate',
'finance',
'investment'],
['human', 'anatomy'],
['hole', 'human', 'anatomy', 'physiology'],
['seeley', 'principle', 'anatomy', 'physiology'],
['anatomy', 'physiology', 'unity', 'form', 'function'],
['seeley', 'anatomy', 'physiology'],
['hole', 'human', 'anatomy', 'physiology'],
['human', 'anatomy'],
['vander', 'human', 'physiology'],
['power', 'choice', 'political', 'science'],
['history', 'western', 'art'],
['reconstructing', 'gender'],
['mechanic', 'material'],
['fluid', 'mechanic'],
['microelectronic', 'circuit', 'design'],
['electric', 'circuit'],
['cmos', 'digital', 'integrated', 'circuit', 'analysis', 'design'],
['microelectronics', 'circuit', 'analysis', 'design'],
['computer', 'organization'],
['power', 'electronics'],
['theater', 'experience'],
['finance', 'theory'],
['investment', 'mcgraw-hill/irwin', 'finance', 'insurance', 'real', 'estate'],
['corporate',
'finance',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'estate'],
['understanding', 'human', 'sexuality'],
['biology'],
['ecology', 'concept'],
7
['environmental', 'science'],
['narrative', 'history', 'volume'],
['differential', 'equation', 'engineer', 'scientist'],
['elementary', 'statistic'],
['film', 'history'],
['film', 'art'],
['art', 'watching', 'film'],
['studying', 'religion'],
['aerodynamics'],
['vector', 'mechanic', 'engineer', 'static', 'dynamic'],
['thermodynamics', 'engineering'],
['simulation', 'modeling', 'analysis'],
['statistical', 'technique', 'business', 'economics'],
['business', 'statistic'],
['microbiology', 'clinical'],
['prescott', 'microbiology'],
['microbiology', 'system'],
['contemporary', 'nutrition'],
['chemistry', 'molecular', 'nature', 'matter', 'change'],
['chemistry'],
['chemistry'],
['principle', 'chemistry'],
['chemistry'],
['organic', 'chemistry'],
['chemistry', 'concept'],
['organic', 'biochemistry'],
['organic', 'chemistry'],
['organic', 'biological', 'chemistry'],
['business', 'communication', 'developing', 'leader', 'networked', 'world'],
['lesikar', 'business', 'communication', 'connecting', 'digital', 'world'],
['business', 'administrative', 'communication'],
['business', 'communication', 'building', 'critical', 'skill'],
['concept', 'biology'],
['biology', 'concept', 'investigation'],
['human', 'physiology'],
['seeley', 'anatomy', 'physiology'],
['mader', 'understanding', 'human', 'anatomy', 'physiology'],
['hole', 'human', 'anatomy', 'physiology'],
['advertising',
'promotion',
'integrated',
'marketing',
'communication',
'perspective'],
['sex', 'gender'],
['unfinished', 'nation', 'concise', 'history', 'american', 'people'],
['experiencing', 'world', 'religion'],
['chemistry', 'atom'],
8
['chemistry', 'atom'],
['economics'],
['economics'],
['economics', 'mcgraw-hill', 'economics'],
['urban', 'economics'],
['environmental', 'economics', 'mcgraw-hill'],
['broadcasting',
'cable',
'internet',
'beyond',
'modern',
'electronic',
'medium'],
['pathway', 'astronomy'],
['college', 'physic', 'integrated', 'force', 'kinematics'],
['astronomy', 'journey', 'cosmic', 'frontier'],
['physic', 'everyday', 'phenomenon'],
['exploration', 'astronomy'],
['pathway', 'astronomy'],
['medical', 'insurance', 'integrated', 'claim', 'process'],
['university', 'physic', 'modern', 'physic'],
['physical', 'science'],
['physic', 'everyday', 'phenomenon'],
['theater', 'lively', 'art'],
['anthology', 'living', 'theater'],
['survey', 'operating', 'system'],
['survey', 'operating', 'system'],
['basic', 'statistic', 'business', 'economics'],
['applied', 'statistic', 'business', 'economics'],
['business',
'statistic',
'practice',
'mcgraw-hill/irwin',
'operation',
'decision',
'science'],
['business', 'research', 'method'],
['microbiology', 'system'],
['foundation', 'microbiology'],
['wardlaw', 'perspective', 'nutrition'],
['contemporary', 'world', 'regional', 'geography'],
['geography'],
['geography'],
['chemistry', 'context'],
['international', 'economics'],
['economics', 'basic', 'mcgraw-hill/irwin', 'economics'],
['labor', 'economics'],
['managerial', 'economics', 'business', 'strategy'],
9
['computer', 'network', 'top'],
['object-oriented', 'programming', 'java'],
['database', 'system', 'concept'],
['java', 'programming', 'ground'],
['algorithm'],
['earth', 'earth', 'science'],
['environmental', 'geology'],
['exploring', 'geology'],
['laboratory', 'manual', 'physical', 'geology'],
['marine', 'biology'],
['vertebrate', 'comparative', 'anatomy', 'function', 'evolution'],
['understanding', 'business'],
['employment', 'law', 'business'],
['business', 'law'],
['matching', 'supply', 'demand', 'operation', 'management'],
['molecular', 'biology'],
['human', 'biology'],
['biology'],
['biology'],
['concept', 'biology'],
['biology', 'concept', 'investigation'],
['human', 'anatomy'],
['electronic', 'medium'],
['medium', 'ethic', 'issue'],
['power', 'choice', 'political', 'science'],
['music', 'appreciation'],
['world', 'music', 'tradition', 'transformation'],
['sociology'],
['foundation', 'material', 'science', 'engineering'],
['shigley',
'mechanical',
'engineering',
'design',
'mcgraw-hill',
'mechanical',
'engineering'],
['thermodynamics', 'engineering'],
['engineering', 'circuit', 'analysis'],
['semiconductor', 'physic', 'device'],
['international', 'marketing'],
['consumer', 'behavior', 'building', 'marketing', 'strategy'],
['personal',
'finance',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'estate'],
10
['corporate',
'finance',
'core',
'principle',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'estate'],
['personal', 'finance'],
['sexuality'],
['transformation', 'woman', 'gender', 'psychology'],
['human', 'sexuality', 'self', 'society', 'culture'],
['biology'],
['biology', 'laboratory', 'manual'],
['calculus', 'transcendental', 'function'],
['applied',
'calculus',
'business',
'economics',
'social',
'life',
'science',
'expanded'],
['calculus', 'business', 'economics', 'social', 'life', 'science', 'version'],
['ecology', 'global', 'insight', 'investigation'],
['ecology', 'concept'],
['principle', 'environmental', 'science', 'inquiry'],
['film', 'art'],
['scripture', 'world', 'religion'],
['ethical',
'issue',
'modern',
'medicine',
'contemporary',
'reading',
'bioethics'],
['philosophy'],
['thermodynamics', 'heat', 'transfer', 'ee', 'software'],
['earth', 'earth', 'science'],
['unfinished',
'nation',
'concise',
'history',
'american',
'people',
'volume'],
['foundation', 'microbiology', 'basic', 'principle'],
11
['exploration', 'astronomy'],
['organic', 'chemistry'],
['thermodynamics', 'engineering', 'student', 'resource', 'dvd'],
['american', 'history', 'survey', 'volume'],
['american', 'history', 'survey', 'volume'],
['vector', 'mechanic', 'engineer', 'static'],
['vector', 'mechanic', 'engineer', 'dynamic'],
['unfinished',
'nation',
'concise',
'history',
'american',
'people',
'volume'],
['thermal-fluid', 'science', 'student', 'resource', 'dvd'],
['fluid', 'mechanic', 'student', 'dvd'],
['music', 'appreciation'],
['loose-leaf', 'financial', 'accounting'],
['foundation',
'financial',
'management',
'time',
'value',
'money',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'estate'],
['elementary', 'statistic', 'step', 'step', 'formula', 'data'],
['principle', 'organic', 'biochemistry', 'connect'],
['microbiology', 'system', 'connect'],
['ecology', 'global', 'insight', 'investigation', 'connect'],
['loose-leaf', 'understanding', 'business'],
['corporate',
'finance',
'alternate',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'estate'],
['biology'],
['loose', 'leaf', 'version', 'anatomy', 'physiology', 'integrative'],
['loose', 'leaf', 'version', 'human', 'anatomy', 'physiology'],
['loose', 'leaf', 'biology'],
['anatomy',
'physiology',
12
'unity',
'form',
'function',
'connect',
'plus/learnsmart',
'semester',
'includes',
'apr',
'phils',
'online'],
['loose-leaf', 'principle', 'corporate', 'finance'],
['experience', 'music', 'audio', 'cd'],
['loose', 'leaf', 'version', 'exploring', 'geology'],
['loose', 'leaf', 'organic', 'biochemistry'],
['loose', 'leaf', 'version', 'human', 'physiology'],
['elementary', 'statistic', 'data', 'formula'],
['electronics', 'principle', 'student', 'data', 'cd-rom'],
['biology', 'connect'],
['exploring', 'geology', 'connect'],
['music', 'appreciation', 'connect', 'upgrade'],
['international', 'business', 'challenge', 'global', 'competition'],
['intermediate', 'accounting', 'annual', 'report'],
['microbiology', 'clinical', 'connect', 'learnsmart', 'semester'],
['loose-leaf', 'organic', 'chemistry'],
['benson',
'microbiological',
'complete',
'version',
'brown',
'microbioligical'],
['film', 'art', 'connect'],
['practical',
'business',
'math',
'procedure',
'handbook',
'student',
'dvd',
'wsj',
'insert'],
['loose', 'leaf', 'cost', 'accounting', 'connect'],
['foundation', 'microbiology', 'basic', 'principle'],
['seeley', 'anatomy', 'physiology', 'connect'],
['loose', 'leaf', 'version', 'environmental', 'geology'],
['loose', 'leaf', 'version', 'environmental', 'geology'],
['exploring', 'social', 'psychology'],
['music', 'appreciation'],
['marketing'],
13
['marketing', 'marketing', 'strategy', 'planning'],
['marketing', 'management'],
['preface', 'marketing', 'management'],
['marketing'],
['personal',
'finance',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'est'],
['finance', 'theory'],
['study',
'finance',
'managing',
'corporate',
'value',
'creation',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'estate'],
['health', 'psychology'],
['international', 'accounting'],
['ethical', 'obligation', 'decision', 'accounting', 'text'],
['advanced', 'accounting'],
['advanced', 'accounting'],
['combo', 'wardlaw', 'perspective', 'nutrition', 'connect', 'semester'],
['anatomy',
'physiology',
'integrative',
'connect',
'plus/learnsmart',
'semester',
'card/apr',
'online',
'access/phils',
'online'],
['statistical', 'technique', 'business', 'economics'],
['business', 'statistic'],
['business', 'statistic'],
['nutritional', 'assessment'],
['human', 'geography', 'landscape', 'human', 'activity'],
['international', 'economics', 'mcgraw-hill', 'economics'],
['public', 'finance'],
['economics', 'mcgraw-hill', 'economics'],
['managerial',
14
'economics',
'foundation',
'business',
'analysis',
'strategy',
'mcgraw-hill',
'economics'],
['economics'],
['economics', 'principle', 'policy', 'mcgraw-hill', 'economics'],
['contemporary', 'labor', 'economics', 'mcgraw-hill', 'economics'],
['exercise', 'physiology', 'theory', 'fitness', 'performance'],
['concept', 'physical', 'fitness', 'active', 'lifestyle', 'wellness'],
['physical', 'education', 'exercise', 'science', 'sport', 'study'],
['vertebrate', 'comparative', 'anatomy', 'function', 'evolution'],
['business', 'changing', 'world'],
['law', 'business'],
['biology'],
['power', 'choice', 'political', 'science'],
['music', 'appreciation'],
['music', 'appreciation'],
['music', 'appreciation'],
['accounting', 'mean'],
['financial', 'statement', 'analysis', 'security', 'valuation'],
['managerial', 'accounting'],
['managerial', 'accounting', 'manager'],
['cost', 'accounting'],
['financial', 'accounting'],
['auditing',
'accounting',
'investigating',
'issue',
'fraud',
'professional',
'ethic'],
['advanced', 'financial', 'accounting'],
['financial', 'managerial', 'accounting'],
['criminology'],
['sociology'],
['sociology'],
['sociology', 'core'],
['mapping', 'social', 'landscape', 'reading', 'sociology'],
['sociology', 'module'],
['practical', 'skeptic', 'core', 'concept', 'sociology'],
['practical', 'skeptic', 'reading', 'sociology'],
['social',
'construction',
'difference',
'inequality',
15
'race',
'class',
'gender',
'sexuality'],
['sociology', 'matter'],
['sociological', 'theory'],
['applied', 'circuit', 'analysis'],
['design', 'operational', 'amplifier', 'analog', 'integrated', 'circuit'],
['analog', 'circuit', 'design', 'discrete', 'integrated'],
['marketing', 'management', 'strategic', 'decision-making'],
['marketing', 'research'],
['marketing', 'management'],
['marketing'],
['advertising'],
['marketing', 'core'],
['marketing', 'strategy', 'decision-focused'],
['basic', 'marketing', 'marketing', 'strategy', 'planning'],
['marketing'],
['business', 'management', 'entrepreneur', 'guidebook'],
['international', 'business'],
['global', 'marketing', 'contemporary', 'theory', 'practice'],
['entrepreneurial', 'business'],
['business',
'ethic',
'decision-making',
'personal',
'integrity',
'social',
'responsibility'],
['business', 'society', 'stakeholder', 'ethic', 'public', 'policy'],
['corporate',
'finance',
'standard',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'estate'],
['corporate',
'finance',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'esta'],
['financial',
'market',
'institution',
16
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'estate'],
['analysis', 'financial', 'management'],
['corporate', 'finance'],
['principle',
'corporate',
'finance',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'estate',
'mcgraw-hill/irwin',
'finance',
'insureance',
'real',
'estate'],
['corporate', 'finance'],
['focus',
'personal',
'finance',
'active',
'help',
'you',
'develop',
'successful',
'financial',
'skill',
'mcgraw-hill/irwin',
'finance',
'insurance',
'real',
'esta'],
['core', 'concept', 'cultural', 'anthropology'],
['magic', 'witchcraft', 'religion', 'reader', 'anthropology', 'religion'],
['human', 'specie', 'biological', 'anthropology'],
['cultural', 'anthropology', 'appreciating', 'cultural', 'diversity'],
['anthropology', 'appreciating', 'human', 'diversity'],
['mirror', 'humanity', 'concise', 'cultural', 'anthropology'],
['introducing', 'anthropology', 'integrated'],
['psychology'],
['psychology', 'perspective', 'connection'],
['understanding', 'psychology'],
['understanding', 'psychology'],
['social', 'psychology'],
17
['psychological', 'testing', 'assessment', 'test', 'measurement'],
['human', 'sexuality', 'diversity', 'contemporary', 'america'],
['experience', 'psychology'],
['personality', 'psychology', 'domain', 'knowledge', 'human', 'nature'],
['abnormal', 'psychology'],
['understanding', 'human', 'sexuality'],
['science', 'psychology', 'appreciative', 'view'],
['looseleaf', 'sexuality'],
['communicating', 'strategy', 'success', 'business', 'profession'],
['basic', 'reading', 'moral', 'philosophy'],
['element', 'moral', 'philosophy'],
['experiencing',
'world',
'religion',
'loose',
'leaf',
'tradition',
'challenge',
'change'],
['business', 'ethic'],
['philosophy', 'power', 'idea'],
['doe', 'center', 'hold', 'western', 'philosophy'],
['moral', 'story', 'ethic'],
['physical', 'education', 'fitness', 'sport'],
['environmental', 'geology'],
['laboratory', 'manual', 'physical', 'geology'],
['biology'],
['managerial', 'accounting'],
['experiencing', 'race', 'class', 'gender', 'united'],
['meaning',
'difference',
'american',
'construction',
'race',
'sex',
'gender',
'social',
'class',
'sexual',
'orientation',
'disability'],
['service', 'marketing'],
['global', 'business'],
['business', 'government', 'society', 'managerial', 'perspective'],
['culture', 'sketch', 'study', 'anthropology'],
['scripture', 'world', 'religion'],
['elementary', 'statistic', 'step', 'step', 'formula'],
['forensic', 'accounting'],
18
['world', 'history', 'texas'],
['business', 'management', 'real-world', 'connection'],
['bscs', 'biology', 'molecular'],
['aerodynamics', 'engineering', 'student', 'sixth'],
['real', 'analysis', 'workbook', 'solution'],
['principle', 'real', 'analysis'],
['probability', 'measure', 'theory'],
['computer',
'organization',
'design',
'companion',
'morgan',
'kaufmann',
'computer',
'architecture',
'design'],
['probability', 'statistic', 'engineer', 'scientist'],
['computer', 'organization', 'design', 'hardware/software', 'interface'],
['digital',
'evidence',
'computer',
'crime',
'forensic',
'science',
'computer',
'internet'],
['computer', 'organization', 'design'],
['advanced', 'topic', 'forensic', 'dna', 'typing'],
['computer',
'organization',
'design',
'revised',
'hardware/software',
'interface',
'morgan',
'kaufmann',
'computer',
'architecture',
'design'],
['data',
'mining',
'practical',
'machine',
'learning',
'tool',
'technique',
'morgan',
'kaufmann',
19
'data',
'management',
'system'],
['forensic', 'science'],
['probability', 'model', 'tenth'],
['molecular', 'biology', 'understanding', 'genetic', 'revolution'],
['differential', 'equation', 'dynamical', 'system', 'chaos'],
['security', 'ninth'],
['computer', 'network', 'system'],
['molecular', 'biology', 'technique', 'classroom', 'laboratory', 'manual'],
['forestry', 'natural', 'resource'],
['computer', 'machine', 'vision', 'theory', 'algorithm', 'practicality'],
['probability', 'random', 'process', 'signal', 'processing', 'communication'],
['course', 'real', 'analysis'],
['computer',
'organization',
'design',
'fifth',
'hardware/software',
'interface',
'morgan',
'kaufmann',
'computer',
'architecture',
'design'],
['probability', 'model', 'eleventh'],
['homeland', 'security', 'principle', 'all-hazards', 'response'],
['semi-riemannian', 'geometry', 'relativity'],
['electronic', 'commerce', 'managerial', 'perspective'],
['microprocessor', 'hardware', 'software', 'principle'],
['soul', 'cinema', 'appreciation', 'film', 'music'],
['computer', 'organization', 'architecture', 'designing', 'performance'],
['microprocessor', 'programming', 'interfacing', 'software', 'hardware'],
['real', 'analysis'],
['differential', 'equation', 'linear', 'algebra'],
['musical', 'theater', 'appreciation'],
['microprocessor', 'microcomputer', 'hardware', 'software'],
['literature', 'latin', 'america', 'antiquity'],
['applied',
'partial',
'differential',
'equation',
'fourier',
'boundary',
'value'],
['wildlife', 'ecology', 'management'],
['forest',
'ecology',
20
'foundation',
'sustainable',
'forest',
'management',
'environmental',
'ethic',
'forestry'],
['digital+microprocessor'],
['data', 'structure', 'c++', 'using', 'stl'],
['digital', 'integrated', 'circuit', 'design', 'perspective'],
['microprocessor', 'programming', 'interfacing', 'software', 'hardware'],
['speech',
'language',
'processing',
'natural',
'language',
'processing',
'computational',
'linguistics',
'speech',
'recognition'],
['structured', 'computer', 'organization'],
['power', 'electronics', 'circuit', 'device'],
['programming', 'language', 'ansi', 'version'],
['quantum', 'mechanic'],
['digital',
'electronics',
'laboratory',
'experiment',
'using',
'xilinx',
'cpld',
'xilinx',
'foundation',
'design',
'simulation',
'software',
'design',
'simulation',
'software'],
['thermodynamics', 'heat', 'power'],
['forensic', 'science', 'crime', 'scene', 'crime', 'lab'],
['risk', 'management', 'insurance', 'prentice', 'hall', 'finance'],
['concept', 'programming', 'language'],
['parallel',
'programming',
'technique',
'using',
21
'networked',
'workstation',
'parallel',
'computer'],
['adts', 'data', 'structure', 'solving', 'c++'],
['operating', 'system', 'design', 'implementation'],
['differential', 'equation'],
['real', 'analysis'],
['business',
'law',
'legal',
'e-commerce',
'ethical',
'global',
'environment'],
['probability', 'stochastic', 'process'],
['probability', 'random', 'process'],
['circuit', 'design', 'theory'],
['partial', 'differential', 'equation', 'fourier', 'boundary', 'value'],
['structured', 'computer', 'organization'],
['principle', 'computer', 'organization', 'assembly', 'language'],
['physic', 'scientist', 'engineer', 'modern', 'physic'],
['computing', 'programming', 'java', 'multimedia'],
['solid', 'electronic', 'device'],
['sacred', 'path', 'understanding', 'religion', 'world'],
['motorola',
'microprocessor',
'family',
'assembly',
'language',
'interface',
'design',
'system',
'design'],
['linguistic', 'perspective', 'language', 'education'],
['principle', 'data', 'network', 'computer', 'communication'],
['listening', 'music'],
['cognition', 'psychology', 'mind', 'brain'],
['history', 'eastern', 'art'],
['testament', 'greek', 'beginner'],
['differential', 'equation', 'linear', 'algebra'],
['database', 'system'],
['political', 'science'],
['calculus', 'graphical', 'numerical', 'algebraic'],
['marketing', 'management'],
['cost', 'accounting'],
['computer', 'network'],
['mechanical', 'vibration'],
22
['programming',
'using',
'visual',
'basic',
'pearson',
'custom',
'computer',
'science'],
['data', 'structure', 'object', 'using', 'c++'],
['database', 'processing'],
['electronic', 'commerce', 'managerial', 'social', 'network', 'perspective'],
['international', 'economics', 'theory', 'policy'],
['strategic',
'management',
'business',
'policy',
'achieving',
'sustainability'],
['crisis',
'intervention',
'criminal',
'justice',
'response',
'chaos',
'mayhem',
'disorder'],
['java', 'solving', 'programming', 'myprogramminglab'],
['balanced', 'computer', 'science'],
['reinforced', 'concrete', 'mechanic', 'design'],
['marketing', 'real', 'people', 'real', 'choice'],
['marketing', 'defined', 'explained', 'applied'],
['technique', 'microbiology', 'student', 'handbook'],
['aerodynamics', 'engineer'],
['physic', 'scientist', 'engineer', 'modern', 'physic'],
['physic', 'scientist', 'engineer', 'chap'],
['operating', 'system', 'internals', 'design', 'principle'],
['design', 'analysis', 'algorithm'],
['forensic', 'psychology'],
['security', 'computing'],
['international',
'monetary',
'financial',
'economics',
'pearson',
'economics'],
['framework', 'marketing', 'management'],
['electronic', 'device', 'conventional', 'current', 'version'],
['accounting', 'information', 'system'],
23
['cost', 'accounting', 'student', 'value'],
['advanced', 'accounting'],
['computer', 'science', 'overview'],
['java', 'program', 'late', 'object'],
['java', 'program'],
['database', 'system', 'database', 'data', 'warehouse'],
['data', 'structure', 'object', 'using', 'java'],
['data', 'structure', 'algorithm', 'analysis', 'java'],
['neuro-fuzzy',
'soft',
'computing',
'computational',
'learning',
'machine',
'intelligence'],
['educational', 'psychology'],
['convergence',
'race',
'ethnicity',
'gender',
'multiple',
'identity',
'counseling',
'merrill',
'counseling'],
['electronic',
'health',
'record',
'understanding',
'using',
'computerized',
'medical',
'record'],
['electronic', 'device', 'circuit', 'theory'],
['modern', 'database', 'management'],
['hinduism'],
['contemporary',
'business',
'online',
'commerce',
'law',
'legal',
'internet',
'ethical',
'global',
'environment'],
['programming', 'world', 'wide', 'web'],
['computer', 'forensics', 'cyber', 'crime'],
24
['chemical',
'engineering',
'thermodynamics',
'prentice',
'hall',
'international',
'physical',
'chemical',
'engineering',
'science'],
['global', 'marketing'],
['e-commerce'],
['economics',
'money',
'banking',
'financial',
'market',
'business',
'school',
'pearson',
'economics'],
['database', 'concept'],
['business', 'data', 'network', 'security'],
['multinational', 'business', 'finance', 'pearson', 'finance'],
['bond', 'market', 'analysis', 'strategy'],
['marketing'],
['statistic', 'business', 'economics'],
['strategic', 'marketing'],
['management', 'science'],
['career',
'fitness',
'program',
'exercising',
'option',
'career',
'fitness',
'program',
'exercising',
'option',
'sukiennik',
'diane',
'author',
'paperback'],
['economics', 'money', 'banking', 'financial', 'market'],
['economics', 'health', 'health', 'care'],
['computer', 'security'],
['marketing', 'hospitality', 'tourism'],
['analyzing',
25
'computer',
'security',
'threat',
'vulnerability',
'countermeasure'],
['starting', 'programming', 'logic', 'design'],
['pharmacology', 'connection', 'nursing', 'practice'],
['speed', 'signaling', 'jitter', 'modeling', 'analysis', 'budgeting'],
['absolute', 'java'],
['aerodynamics', 'engineer'],
['data', 'structure', 'algorithm', 'analysis', 'c++'],
['ethic', 'information', 'age'],
['starting', 'java', 'control', 'structure', 'object'],
['computer', 'networking', 'top-down'],
['legal', 'environment', 'business', 'online', 'commerce'],
['business', 'law'],
['hospital',
'operation',
'principle',
'efficiency',
'health',
'care',
'press',
'operation',
'management'],
['engineering', 'mechanic', 'dynamic'],
['engineering', 'mechanic', 'static'],
['structured', 'computer', 'organization'],
['computing', 'programming', 'python'],
['computer',
'organization',
'architecture',
'william',
'stalling',
'book',
'computer',
'data',
'communication'],
['java', 'programming', 'comprehensive', 'version'],
['economics', 'micro', 'view'],
['international', 'economics', 'pearson', 'economics'],
['economics', 'principle', 'tool'],
['e-marketing'],
['criminology', 'justice'],
['business', 'communication'],
['business', 'communication'],
['criminological', 'theory'],
['starting',
26
'java',
'control',
'structure',
'object',
'myprogramminglab',
'pearson'],
['program'],
['international', 'business', 'reality'],
['corporate', 'finance'],
['principle', 'risk', 'management', 'insurance', 'pearson', 'finance'],
['future', 'option', 'market'],
['foundation', 'finance', 'pearson', 'finance'],
['engineering', 'mechanic', 'static', 'masteringengineering', 'pearson'],
['economics'],
['managerial', 'economics'],
['business', 'statistic'],
['principle', 'economics'],
['business',
'data',
'communications-',
'infrastructure',
'networking',
'security'],
['e-commerce'],
['pharmacology', 'nurse', 'pathophysiologic'],
['database', 'processing', 'design', 'implementation'],
['statistic', 'manager', 'using', 'microsoft', 'excel'],
['international', 'business', 'challenge', 'globalization'],
['marketing', 'research'],
['principle', 'marketing'],
['corporate', 'finance', 'core'],
['comprehensive',
'health',
'insurance',
'billing',
'coding',
'reimbursement',
'student',
'workbook'],
['java', 'software', 'structure', 'designing', 'using', 'data', 'structure'],
['mechanic', 'material'],
['neural', 'network', 'architecture', 'algorithm'],
['cryptography', 'network', 'security', 'principle', 'practice'],
['network', 'security', 'standard'],
['java', 'foundation'],
['criminology', 'integrative'],
['e-commerce'],
['computer', 'network', 'internet'],
27
['prentice', 'hall', 'earth', 'science'],
['computer', 'science', 'overview'],
['sociology'],
['operating', 'system', 'internals', 'design', 'principle'],
['history', 'dance', 'art', 'education'],
['hinduism', 'cultural', 'perspective'],
['wildlife',
'forest',
'forestry',
'principle',
'managing',
'forest',
'biological',
'diversity'],
['intel', 'microprocessor'],
['technical', 'communication', 'twenty-first', 'century'],
['criminalistics', 'forensic', 'science'],
['electronics', 'circuit', 'device'],
['principle', 'electric', 'circuit', 'conventional', 'current', 'version'],
['core', 'concept', 'pharmacology', 'mynursingkit'],
['contemporary',
'clinical',
'immunology',
'serology',
'pearson',
'clinical',
'laboratory',
'science'],
['criminology', 'sociological', 'understanding'],
['homeland', 'security'],
['health', 'physical', 'assessment', 'nursing'],
['understanding', 'pharmacology', 'health', 'profession'],
['doe', 'earth', 'physical', 'geology', 'process', 'science'],
['differential', 'equation', 'computing', 'modeling'],
['elementary', 'differential', 'equation', 'boundary', 'value'],
['course', 'database', 'system'],
['e-commerce', 'business', 'technology', 'society'],
['modern', 'operating', 'system'],
['industrial', 'ecology', 'sustainable', 'engineering'],
['contemporary', 'business', 'online', 'commerce', 'law'],
['earth', 'science'],
['christian', 'theological', 'tradition'],
['course', 'probability'],
['managing', 'business', 'process', 'flow'],
['motorola',
'microprocessor',
'family',
'assembly',
28
'language',
'interface',
'design',
'system',
'design'],
['differential', 'equation', 'linear', 'algebra'],
['art', 'history'],
['solving', 'data', 'structure', 'using', 'java', 'multimedia'],
['computer', 'network', 'internet'],
['history', 'modern', 'art'],
['introductory',
'chemical',
'engineering',
'thermodynamics',
'prentice',
'hall',
'international',
'physical',
'chemical',
'engineering',
'science'],
['hcs', 'microprocessor'],
['computer', 'organization', 'architecture', 'designing', 'performance'],
['marketing', 'management'],
['computer', 'networking', 'top-down'],
['marketing', 'research', 'applied', 'orientation'],
['contemporary', 'direct', 'interactive', 'marketing'],
['database', 'system'],
['electronic', 'commerce'],
['data', 'structure', 'abstraction', 'java'],
['service', 'marketing'],
['electronic', 'commerce', 'spectrum', 'book'],
['electric', 'circuit', 'masteringengineering'],
['principle',
'risk',
'management',
'insurance',
'prentice',
'hall',
'finance'],
['history', 'art', 'africa'],
['physic', 'scientist', 'engineer', 'modern', 'physic'],
['network', 'flow', 'theory', 'algorithm'],
['natural', 'language', 'processing', 'prolog', 'programmer'],
['educational', 'psychology', 'theory', 'practice', 'myeducationlab'],
['mastering',
'method',
'differentiated',
29
'instruction',
'culturally',
'linguistically',
'diverse',
'cld',
'student',
'myeducationkit'],
['criminology', 'integrative', 'mycrimekit'],
['core', 'java', 'volume', 'sun', 'core'],
['introductory', 'circuit', 'analysis'],
['linguistics', 'non-linguists', 'primer', 'exercise'],
['esol',
'teacher',
'classroom',
'teacher',
'linguistically',
'culturally',
'ethnically',
'diverse',
'student'],
['quantum',
'mechanic',
'engineering',
'material',
'science',
'applied',
'physic'],
['e-commerce'],
['international', 'economics', 'theory', 'policy', 'student', 'value'],
['islam', 'mysearchlab', 'religion'],
['life', 'health', 'insurance'],
['quantum', 'mechanic', 'chemistry'],
['molecular', 'thermodynamics', 'fluid-phase', 'equilibrium'],
['origin', 'virtue', 'human', 'instinct', 'evolution', 'cooperation'],
['rise', 'fall', 'athens', 'nine', 'greek', 'life'],
['evolution', 'true'],
['dramaturgy', 'american', 'theater', 'source', 'book', 'source', 'book'],
['basic', 'judaism'],
['regional',
'farm',
'bill',
'field',
'hearing',
'redmond',
'oregon',
'hearing',
'committee',
'agriculture',
30
'nutrition',
'forestry',
'united',
'senate',
'hundred',
'ninth',
'congress',
'session',
'august'],
['analytical',
'mechanic',
'relativity',
'quantum',
'mechanic',
'oxford',
'graduate',
'text'],
['hinduism', 'short'],
['judaism', 'short'],
['foundation', 'buddhism'],
['history', 'practice', 'ancient', 'astronomy'],
['oxford', 'history', 'islam'],
['testament', 'reader', 'christianity'],
['classical', 'greek', 'reader'],
['challenge', 'ethic', 'environmentalism', 'feminism', 'multiculturalism'],
['game', 'theory'],
['islamic', 'threat', 'myth', 'reality'],
['anne', 'orthwood', 'bastard', 'sex', 'law', 'virginia'],
['athenaze', 'ancient', 'greek'],
['theorizing', 'feminism', 'reader'],
['testament', 'christian', 'writing', 'reader'],
['eight', 'theory', 'religion'],
['islam', 'straight', 'path'],
['playing', 'real', 'text', 'game', 'theory'],
['oxford', 'encyclopedia', 'islamic', 'world', 'six-volume', 'set'],
['buddhism'],
['doe', 'feminism', 'discriminate', 'debate'],
['game', 'theory', 'international'],
['microelectronic',
'circuit',
'oxford',
'electrical',
'computer',
'engineering'],
['film', 'theory', 'criticism'],
['magic', 'witchcraft', 'ghost', 'greek', 'roman', 'world', 'sourcebook'],
['vold', 'theoretical', 'criminology'],
['democracy',
31
'latin',
'america',
'political',
'change',
'comparative',
'perspective'],
['anthropology', 'doe', 'mean', 'human'],
['wave', 'oscillation', 'prelude', 'quantum', 'mechanic'],
['islam', 'straight', 'path'],
['conversation', 'historical', 'philosophy'],
['oxford', 'handbook', 'judaism', 'economics'],
['english', 'language', 'linguistic', 'history'],
['concise', 'world', 'religion'],
['chinese', 'south-east', 'asian', 'white', 'ware', 'found', 'philippine'],
['role',
'government',
'east',
'asian',
'economic',
'development',
'comparative',
'institutional',
'analysis'],
['chaos', 'nonlinear', 'dynamic', 'scientist', 'engineer'],
['chaos', 'time-series', 'analysis'],
['proboscidea',
'evolution',
'palaeoecology',
'elephant',
'relative',
'oxford',
'science',
'publication'],
['probability', 'random', 'process'],
['introducing', 'einstein', 'relativity'],
['dictionary', 'hinduism'],
['latin', 'dictionary', 'founded', 'andrew', 'freud', 'latin', 'dictionary'],
['greek-english', 'lexicon'],
['deuteronomy', 'judaean', 'diaspora'],
['feminism', 'film'],
['intermediate', 'greek-english', 'lexicon'],
['biochemistry', 'molecular', 'biology'],
['building', 'bioinformatics', 'solution', 'perl', 'mysql'],
['relativity', 'einstein', 'equation'],
['feminism', 'issue', 'argument'],
['canon', 'criterion', 'christian', 'theology', 'father', 'feminism'],
['evolution'],
['woman', 'indian', 'buddhism', 'comparative', 'textual', 'study'],
32
['cultural', 'anthropology', 'perspective', 'human', 'condition'],
['molecular', 'quantum', 'mechanic'],
['molecular', 'biology', 'principle', 'genome', 'function'],
['relativity',
'gravitation',
'cosmology',
'basic',
'oxford',
'master',
'physic'],
['molecular', 'biology', 'cancer', 'mechanism', 'target', 'therapeutic'],
['einstein',
'physic',
'atom',
'quantum',
'relativity',
'derived',
'explained',
'appraised'],
['medicinal', 'chemistry'],
['biochemistry', 'molecular', 'basis', 'life'],
['ecoimmunology'],
['invitation', 'world', 'religion'],
['gender', 'prism', 'difference'],
['testament', 'historical', 'christian', 'writing'],
['world', 'religion'],
['introducing', 'philosophy', 'text', 'integrated', 'reading'],
['crime', 'criminology'],
['ethic', 'history', 'theory', 'contemporary', 'issue'],
['exploring', 'philosophy', 'introductory', 'anthology'],
['philosophy', 'classical', 'contemporary', 'reading'],
['criminological', 'theory', 'evaluation'],
['christ',
'messiah',
'christ',
'language',
'paul',
'messiah',
'language',
'ancient',
'judaism'],
['talon', 'eagle', 'latin', 'america', 'united', 'world'],
['marine', 'biology', 'function', 'biodiversity', 'ecology'],
['american', 'popular', 'music'],
['physical', 'geography', 'global', 'environment'],
['buddhism', 'introducing', 'buddhist', 'experience'],
['oxford', 'latin', 'course', 'college', 'grammar', 'exercise', 'context'],
['oxford', 'latin', 'course', 'college', 'reading', 'vocabulary'],
33
['colonial', 'latin', 'america'],
['gendered', 'society'],
['gendered', 'society', 'reader'],
['josephus', 'theology', 'ancient', 'judaism'],
['modern', 'latin', 'america'],
['moral', 'life', 'introductory', 'reader', 'ethic', 'literature'],
['reading', 'greek', 'history', 'source', 'interpretation'],
['philosophy', 'quest', 'truth'],
['advanced', 'quantum', 'mechanic'],
['mechanic', 'thermodynamics', 'propulsion'],
['genetic', 'algorithm', 'search', 'optimization', 'machine', 'learning'],
['exploring', 'black', 'hole', 'relativity'],
['real', 'analysis', 'course'],
['modern', 'quantum', 'mechanic'],
['nonlinear', 'dynamics+chaos'],
['concrete', 'mathematics', 'foundation', 'computer', 'science'],
['computer', 'system', 'organization', 'architecture'],
['design', 'implementation', 'freebsd', 'operating', 'system'],
['course', 'probability'],
['art', 'computer', 'programming', 'volume', 'algorithm'],
['literature', 'reading', 'writing'],
['agenda',
'alternative',
'public',
'policy',
'update',
'epilogue',
'health',
'care',
'longman',
'classic',
'political',
'science'],
['greek', 'art', 'archaeology'],
['gender', 'psychological', 'perspective'],
['human', 'sexuality', 'mydevelopmentlab'],
['art', 'history'],
['business', 'ethic', 'concept'],
['jew', 'christian', 'muslim', 'comparative', 'monotheistic', 'religion'],
['medium', 'ethic', 'moral', 'reasoning'],
['english', 'linguistic'],
['bible'],
['concise', 'linguistics'],
['latin', 'america', 'people', 'combined', 'volume'],
['short', 'writing', 'biology'],
['vision', 'america', 'history', 'united', 'volume'],
['sociology', 'project', 'introducing', 'sociological', 'imagination'],
['sociology', 'down-to-earth'],
34
['sociology'],
['biological', 'anthropology'],
['backpack', 'literature', 'fiction', 'poetry', 'drama', 'writing'],
['religion', 'world'],
['history', 'world', 'religion'],
['psychology'],
['social', 'psychology', 'united'],
['core', 'question', 'philosophy', 'text', 'reading', 'mythinkinglab'],
['forensic', 'psychology'],
['human', 'sexuality'],
['human', 'sexuality', 'paper'],
['living', 'religion'],
['literature', 'fiction', 'poetry', 'drama', 'writing'],
['human', 'evolution', 'culture', 'highlight', 'anthropology'],
['conformity',
'conflict',
'reading',
'cultural',
'anthropology',
'myanthrolab'],
['short', 'writing', 'film'],
['art', 'history', 'myartslab'],
['physiology', 'behavior'],
['janson', 'basic', 'history', 'western', 'art', 'history', 'art'],
['statistic', 'psychology'],
['history', 'modern', 'art', 'paperback'],
['history', 'modern', 'art', 'volume'],
['human',
'biology',
'social',
'worker',
'development',
'ecology',
'genetics',
'health'],
['anthropology', 'latin', 'america', 'caribbean'],
['understanding', 'music'],
['head', 'broadcasting', 'america', 'survey', 'electronic', 'medium'],
['writing', 'political', 'science', 'practical'],
['history', 'film'],
['religion', 'matter', 'sociology', 'teach', 'religion', 'world'],
['literature'],
['visual', 'art', 'history', 'revised'],
['janson', 'history', 'art', 'western', 'tradition'],
['janson', 'history', 'art', 'western', 'tradition', 'volume', 'myartslab'],
['janson', 'history', 'art', 'western', 'tradition', 'volume'],
['flashback', 'film', 'history'],
['greek', 'history', 'culture', 'society'],
35
['history',
'italian',
'renaissance',
'art',
'paper',
'cover',
'mysearchlab',
'art'],
['photography',
'cultural',
'history',
'mysearchlab',
'art',
'mysearchlab',
'art'],
['latin', 'america', 'interpretive', 'history'],
['world', 'history'],
['consider',
'source',
'document',
'latin',
'american',
'history',
'interpretive',
'history'],
['sociology', 'religion', 'reader'],
['anthropology', 'religion', 'magic', 'witchcraft'],
['ultimate', 'question', 'thinking', 'philosophy'],
['seeing',
'ourselves',
'classic',
'contemporary',
'cross-cultural',
'reading',
'sociology'],
['anthropology', 'myanthrolab'],
['art', 'history', 'volume', 'myartslab'],
['art', 'history', 'volume', 'myartslab'],
['art', 'history', 'combined', 'myartslab'],
['woman',
'politics',
'american',
'society',
'longman',
'classic',
'political',
'science'],
['political', 'science'],
36
['look', 'art', 'history'],
['film', 'critical', 'mycommunicationkit'],
['sociology'],
['human', 'sexuality'],
['world', 'mysearchlab', 'political', 'science'],
['cultural', 'anthropology', 'globalizing', 'world'],
['forensic', 'anthropology'],
['empirical', 'political', 'analysis', 'mysearchlab', 'political', 'science'],
['challenge', 'world', 'development', 'mysearchlab', 'political', 'science'],
['janson', 'history', 'art', 'western', 'tradition', 'myartslab', 'pearson'],
['social', 'psychology'],
['statistic', 'behavioral', 'social', 'science'],
['sociology', 'education'],
['conscious', 'reader'],
['genderspeak', 'personal', 'effectiveness', 'gender', 'communication'],
['political', 'science', 'student', 'writer', 'manual'],
['thinking', 'woman', 'sociological', 'perspective', 'sex', 'gender'],
['understanding', 'psychology'],
['understanding',
'political',
'world',
'comparative',
'political',
'science',
'mypoliscilab'],
['psychology', 'life'],
['art', 'history'],
['art', 'history', 'volume'],
['art', 'history', 'portable', 'book'],
['art', 'history', 'portable', 'book'],
['art', 'history', 'portable', 'book'],
['art', 'history', 'portable', 'book'],
['art', 'history', 'volume'],
['religion', 'world', 'myreligionlab'],
['art', 'beyond', 'west'],
['sociology', 'down-to-earth', 'mysoclab'],
['sociology', 'down-to-earth'],
['thinking', 'woman', 'sociological', 'perspective', 'sex', 'gender'],
['literature', 'writing', 'process'],
['exploring', 'biological', 'anthropology'],
['human', 'sexuality', 'world', 'diversity'],
['world', 'religion'],
['forty', 'study', 'changed', 'psychology'],
['human',
'evolution',
'culture',
'highlight',
'anthropology',
37
'myanthrolab',
'pearson'],
['music', 'sight', 'singing'],
['art', 'history', 'volume', 'book', 'carte'],
['art', 'history', 'volume', 'book', 'carte', 'myartslab'],
['abnormal', 'psychology'],
['art', 'history', 'volume', 'myartslab'],
['art', 'history', 'myartslab'],
['human', 'sexuality', 'world', 'diversity', 'paperback'],
['living', 'religion'],
['living', 'religion', 'myreligionlab', 'pearson'],
['mastering', 'world', 'psychology'],
['political', 'science'],
['political', 'science', 'mypoliscilab', 'pearson'],
['short', 'course', 'photography', 'film', 'darkroom'],
['sociology'],
['human', 'sexuality'],
['sociology', 'down-to-earth'],
['greek-english', 'lexicon', 'testament', 'christian', 'literature'],
['greek', 'tragedy'],
['prediction', 'profiling', 'policing', 'punishing', 'actuarial', 'age'],
['travesti',
'sex',
'gender',
'culture',
'brazilian',
'transgendered',
'prostitute'],
['cult', 'territory', 'origin', 'greek', 'city-state'],
['disability',
'judaism',
'christianity',
'islam',
'sacred',
'text',
'historical',
'tradition',
'social',
'analysis'],
['politics',
'passion',
'woman',
'sexual',
'culture',
'afro-surinamese',
'diaspora'],
['source', 'east', 'asian', 'tradition'],
['source', 'east', 'asian', 'tradition', 'modern', 'period'],
38
['source', 'east', 'asian', 'tradition'],
['jewishness', 'critique', 'zionism', 'direction', 'critical', 'theory'],
['islam', 'america'],
['voice', 'vision', 'creative', 'narrative', 'film', 'production'],
['carlos', 'aldama', 'life', 'bat', 'cuba', 'diaspora', 'drum'],
['cognitive',
'model',
'speech',
'processing',
'psycholinguistic',
'computational',
'perspective',
'acl-mit',
'press',
'natural',
'language',
'processing'],
['machine', 'learning', 'adaptive', 'computation', 'machine', 'learning'],
['process', 'life', 'molecular', 'biology'],
['probabilistic',
'graphical',
'model',
'principle',
'technique',
'adaptive',
'computation',
'machine',
'learning'],
['circuit', 'design', 'simulation', 'vhdl'],
['machine',
'learning',
'probabilistic',
'perspective',
'adaptive',
'computation',
'machine',
'learning'],
['foundation',
'machine',
'learning',
'adaptive',
'computation',
'machine',
'learning'],
['game', 'theory', 'social', 'contract', 'playing'],
['algorithm'],
['strategy', 'game', 'theory', 'practice'],
['digital',
39
'performance',
'history',
'medium',
'theater',
'dance',
'performance',
'art',
'installation'],
['game', 'theory'],
['bioinformatics', 'algorithm'],
['foundation', 'statistical', 'natural', 'language', 'processing'],
['learning',
'kernel',
'support',
'vector',
'machine',
'regularization',
'optimization',
'beyond'],
['linguistics', 'sixth', 'language', 'communication'],
['exile', 'diaspora', 'stranger'],
['e-business', 'e-commerce', 'management'],
['computer', 'networking', 'james', 'kurose', 'keith', 'ross'],
['computer',
'organization',
'architecture',
'designing',
'performance',
'william',
'stalling'],
['defeat', 'bad', 'news', 'rwanda', 'musinga', 'africa', 'diaspora'],
['theater', 'technology'],
['theater', 'design'],
['theater', 'avant-garde', 'critical', 'anthology'],
['visual',
'judaism',
'late',
'antiquity',
'historical',
'context',
'jewish',
'art'],
['anthology', 'arabic', 'literature', 'culture', 'pre-islamic', 'time'],
['learn', 'read', 'greek', 'textbook', 'workbook', 'set'],
['relativity',
'gravitation',
'hundred',
'birth',
40
'albert',
'einstein',
'volume'],
['principle', 'quantum', 'mechanic'],
['psychotherapy', 'buddhism', 'integration'],
['handbook', 'urban', 'community', 'forestry', 'northeast'],
['molecular', 'immunology', 'complex', 'carbohydrate'],
['encyclopedia', 'diaspora', 'immigrant', 'refugee', 'culture', 'world'],
['capo',
'music',
'writing',
'finest',
'writing',
'rock',
'pop',
'jazz',
'country'],
['faith',
'journalist',
'investigates',
'toughest',
'objection',
'christianity'],
['basic', 'biblical', 'greek', 'workbook'],
['basic', 'biblical', 'greek', 'grammar'],
['stage', 'drama', 'classical', 'contemporary', 'theater'],
['exploring', 'american', 'history', 'volume', 'survey', 'source'],
['critical', 'vision', 'film', 'theory'],
['anatomy', 'film'],
['film'],
['contemporary', 'linguistics'],
['world', 'global', 'history', 'source', 'volume'],
['world', 'global', 'history', 'source', 'volume'],
['nvestigative', 'reporter', 'handbook', 'document', 'database', 'technique'],
['contemporary', 'linguistics', 'study'],
['america', 'concise', 'history', 'volume'],
['america', 'concise', 'history', 'volume'],
['rule', 'writer', 'writing', 'literature', 'tabbed', 'version'],
['literature', 'matter', 'anthology', 'reader', 'writer'],
['american', 'promise', 'volume', 'history', 'united'],
['history', 'world', 'society', 'volume'],
['woman',
'eye',
'american',
'history',
'document',
'woman',
'eye',
41
'american',
'history',
'document',
'dubois',
'ellen',
'carol',
'author',
'paperback'],
['film', 'experience'],
['ashe',
'traditional',
'religion',
'healing',
'sub-saharan',
'africa',
'diaspora',
'classified',
'international',
'bibliography',
'bibliography',
'index',
'afro-american',
'african',
'study'],
['broadway', 'encyclopedia', 'theater', 'american', 'culture'],
['feminism', 'woman', 'worldwide', 'volume', 'volume', 'woman', 'psychology'],
['material', 'criminal', 'law', 'american', 'casebook'],
['murray',
'flechtner',
'sale',
'lease',
'electronic',
'commerce',
'material',
'national',
'international',
'transaction'],
['sex-based', 'discrimination', 'text', 'material', 'american', 'casebook'],
['real', 'analysis'],
['longman', 'anthology', 'drama', 'theater', 'global', 'perspective'],
['writing', 'political', 'science'],
['computer', 'networking', 'internet'],
...]
In [10]: all_tokens[0]
Out[10]: ['philosophy', 'sex', 'love', 'reader']
In [11]: len(word_index_map)
42
Out[11]: 2070
In [12]: word_index_map["philosophy"]
Out[12]: 0
In [13]: check = pd.DataFrame(X)
check.iloc[:10,:80]
Out[13]: 0 1 2 3 4 5 6 7 8 9 ... 70 71 72 73 \
0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
1 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 ... 0.0 0.0 0.0 0.0
2 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 ... 0.0 0.0 0.0 0.0
3 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
4 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
5 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 0.0
6 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 0.0
7 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 0.0
8 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
9 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0
74 75 76 77 78 79
0 0.0 0.0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 0.0 0.0
5 0.0 0.0 0.0 0.0 0.0 0.0
6 0.0 0.0 0.0 0.0 0.0 0.0
7 0.0 0.0 0.0 0.0 0.0 0.0
8 0.0 0.0 0.0 0.0 0.0 0.0
9 0.0 0.0 0.0 0.0 0.0 0.0
[10 rows x 80 columns]
In [16]: from __future__ import print_function, division
from builtins import range
svd = TruncatedSVD()
Z = svd.fit_transform(X)
plt.scatter(Z[:,0],Z[:,1])
for i in range(D):
plt.annotate(s=index_word_map[i], xy=(Z[i,0],Z[i,1]))
plt.show()
# Get current size
fig_size = plt.rcParams["figure.figsize"]
# Prints: [8.0, 6.0]
43
print("Current size:", fig_size)
# Set figure width to 12 and height to 9
fig_size[0] = 40
fig_size[1] = 20
plt.rcParams["figure.figsize"] = fig_size
Current size: [40.0, 20.0]
In [15]: pd.DataFrame(Z)
Out[15]: 0 1
0 0.082863 0.090046
1 0.137366 0.204138
2 0.025287 0.007821
3 0.071670 0.038627
4 0.161135 0.077085
5 0.164317 0.362638
6 0.091680 0.183416
7 0.129482 0.205153
8 0.143341 -0.016797
9 0.706761 -0.218631
10 0.007544 0.010085
11 0.007544 0.010085
12 0.007544 0.010085
13 0.074527 0.003684
14 0.139557 0.049312
15 0.986955 1.702502
44
16 0.248316 0.007972
17 0.086606 0.014585
18 0.211094 0.393591
19 0.032749 -0.006602
20 0.051950 -0.006406
21 0.348230 -0.037855
22 0.019748 -0.003450
23 0.012128 0.012000
24 0.873528 -0.231210
25 0.505580 0.645919
26 0.012128 0.012000
27 0.003020 0.004104
28 0.178211 0.297224
29 0.001877 0.002365
... ... ...
2040 0.005699 0.006780
2041 0.004367 0.002224
2042 0.004367 0.002224
2043 0.004367 0.002224
2044 0.004367 0.002224
2045 0.003226 0.004005
2046 0.004105 0.003348
2047 0.004105 0.003348
2048 0.017645 0.038801
2049 0.004105 0.003348
2050 0.004951 -0.001813
2051 0.004951 -0.001813
2052 0.004951 -0.001813
2053 0.004951 -0.001813
2054 0.004951 -0.001813
2055 0.013540 0.035453
2056 0.013540 0.035453
2057 0.013540 0.035453
2058 0.013540 0.035453
2059 0.013540 0.035453
2060 0.013540 0.035453
2061 0.013540 0.035453
2062 0.013540 0.035453
2063 0.011516 0.009690
2064 0.007192 0.005615
2065 0.011344 -0.001988
2066 0.010791 -0.000857
2067 0.010791 -0.000857
2068 0.010791 -0.000857
2069 0.010791 -0.000857
[2070 rows x 2 columns]
45