""" A. Input Get 50% off on every purchase. contact marketing team at market@qq.com. Find all your linkedin contact...
| """ | |
| A. Input | |
| Get 50% off on every purchase. contact marketing team at market@qq.com. Find all your linkedin | |
| contacts for free, jeff.peterson@b2bsearch.com. qq.com partnership program apply at | |
| market@qq.com | |
| B. Expected Output | |
| { "market@qq.com" : {"Occurance":2, "EmailType": "Non-Human"} , | |
| "jeff.peterson@b2bsearch.com" : {"Occurance":1, "EmailType": "Human"} | |
| } | |
| C. Explanation: | |
| The output must be in a nested json format. | |
| "Occurance" : No of times the email is repeated in the text. | |
| "EmailType" : Type of the email. You can have more complex logic to identify human and non- | |
| human emails but in this exercise, Just try the logic given below: | |
| Finding human emails: If the email is of format firstname.lastname@email.com then you can | |
| assume that the email is human. | |
| Finding non-human emails: If the email format is text@email.com where text is less than 8 | |
| characters, then you can assume that the email is likely to be non-human. | |
| Note: Get text file from here ' ' | |
| """ | |
| import json | |
| import re | |
| # Email filter using split and loop | |
| def filterEmail(data): | |
| strings = data.replace("\n"," ").split(" ") | |
| emailList = [string for string in strings if "@" in string and len(string)>7] | |
| jsonifyOutput(emailList) | |
| # Email filter using regex | |
| def filterEmailWithRegex(data): | |
| emailList = re.findall('\S+@\S+', data) # \S -> Matches any non-whitespace character | |
| jsonifyOutput(emailList) | |
| # create nested dictionary for the output result as per description | |
| def jsonifyOutput(emailList): | |
| mainDict = {} | |
| for email in set(emailList): | |
| subDict = {} | |
| occurance = emailList.count(email) | |
| subDict["Occurance"] = occurance | |
| emailSplit = email.split('@')[0] | |
| if '.' in emailSplit: | |
| subDict["EmailType"] = "Human" | |
| elif '.' not in emailSplit and len(emailSplit)<8: | |
| subDict["EmailType"] = "Non-Human" | |
| else: | |
| subDict["EmailType"] = "Null" | |
| mainDict[email] = subDict | |
| exportJsonResult(mainDict) | |
| # Export nested dictionary output into json | |
| def exportJsonResult(outputResult): | |
| with open("result.json","w") as resultFile: | |
| json.dump(outputResult,resultFile) | |
| # Read the test file given "websiteData.txt" | |
| def readFile(): | |
| with open("websiteData.txt","r") as file: | |
| text_data = file.read() | |
| filterEmail(text_data) | |
| # uncomment below code for email filteration using regex | |
| # filterEmailWithRegex(text_data) | |
| if __name__ == "__main__": | |
| readFile() |
COMMENTS