# -*- coding: utf-8 -*- import re def split_text_to_word(text: str): words_list = re.findall(r'\b[-\'\w]+\b', text) return words_list def get_article_words_count(text: str): return len(split_text_to_word(text)) def split_text_to_sentences(text: str) -> list: sentences = re.split(r'(?<=[.!?;])', text) sentences = [i for i in sentences if i.replace(" ", "")] return sentences def split_text_to_word_punctuation(text: str): word_punctuation_list = re.findall(r'\b[-\'\w]+\b|[^\w\s]|\n', text) return word_punctuation_list def is_word(single_word: str, strict: bool = False): """strict 严格模式,默认不开。严格模式下,每个实体字符必须是字母。全部都是字母才算是单词 非严格模式下,有一个字母就算是单词。即使是 op123 """ single_word = single_word.strip() if strict: r = all([re.search(r'[a-zA-Z]', char_) for char_ in single_word if char_]) if r: return True return False if re.search(r'[a-zA-Z]', single_word): return True return False if __name__ == '__main__': a = "fdh fgdhf fgd-y i'am a student.gfddfgfd dfhgfd ! fdgh,fdgh fght. 3.1415" print(is_word("student34", strict=True))