Use snake_case() and associated tests.

This commit is contained in:
Marina Samuel 2020-06-03 12:54:35 -04:00
Родитель 5cbdfa0dcc
Коммит 99ae0c5646
7 изменённых файлов: 576 добавлений и 9 удалений

Просмотреть файл

@ -0,0 +1,24 @@
"""Generic utility functions."""
import re
# Search for all camelCase situations in reverse with arbitrary lookaheads.
REV_WORD_BOUND_PAT = re.compile(
r"""
\b # standard word boundary
|(?<=[a-z][A-Z])(?=\d*[A-Z]) # A7Aa -> A7|Aa boundary
|(?<=[a-z][A-Z])(?=\d*[a-z]) # a7Aa -> a7|Aa boundary
|(?<=[A-Z])(?=\d*[a-z]) # a7A -> a7|A boundary
""",
re.VERBOSE,
)
def snake_case(line: str) -> str:
"""Convert a string into a snake_cased string."""
# replace non-alphanumeric characters with spaces in the reversed line
subbed = re.sub(r"[^\w]|_", " ", line[::-1])
# apply the regex on the reversed string
words = REV_WORD_BOUND_PAT.split(subbed)
# filter spaces between words and snake_case and reverse again
return "_".join([w.lower() for w in words if w.strip()])[::-1]

Просмотреть файл

@ -12,6 +12,7 @@ from time import sleep
sys.path.append(str(Path(__file__).parent.parent.parent.resolve()))
from bigquery_etl.format_sql.formatter import reformat
from bigquery_etl.util.common import snake_case
PROBE_INFO_SERVICE = (
@ -447,7 +448,7 @@ def get_scalar_probes(scalar_type):
data = json.loads(gzip.decompress(url.read()).decode())
scalar_probes = set(
[
x.replace("scalar/", "").replace(".", "_")
snake_case(x.replace("scalar/", ""))
for x in data.keys()
if x.startswith("scalar/")
]

Просмотреть файл

@ -0,0 +1,27 @@
AAA,aaa
AAa,a_aa
AA7,aa7
AaA,aa_a
Aaa,aaa
Aa7,aa7
A7A,a7a
A7a,a7a
A77,a77
aAA,a_aa
aAa,a_aa
aA7,a_a7
aaA,aa_a
aaa,aaa
aa7,aa7
a7A,a7_a
a7a,a7a
a77,a77
7AA,7aa
7Aa,7aa
7A7,7a7
7aA,7a_a
7aa,7aa
7a7,7a7
77A,77a
77a,77a
777,777
1 AAA aaa
2 AAa a_aa
3 AA7 aa7
4 AaA aa_a
5 Aaa aaa
6 Aa7 aa7
7 A7A a7a
8 A7a a7a
9 A77 a77
10 aAA a_aa
11 aAa a_aa
12 aA7 a_a7
13 aaA aa_a
14 aaa aaa
15 aa7 aa7
16 a7A a7_a
17 a7a a7a
18 a77 a77
19 7AA 7aa
20 7Aa 7aa
21 7A7 7a7
22 7aA 7a_a
23 7aa 7aa
24 7a7 7a7
25 77A 77a
26 77a 77a
27 777 777

Просмотреть файл

@ -0,0 +1,232 @@
AvailablePageFile,available_page_file
AvailablePhysicalMemory,available_physical_memory
AvailableVirtualMemory,available_virtual_memory
BuildID,build_id
D2DEnabled,d2d_enabled
DWriteEnabled,d_write_enabled
GPUActive,gpu_active
Headless,headless
IsGarbageCollecting,is_garbage_collecting
LowEndMachine,low_end_machine
ProductID,product_id
ProductName,product_name
RAM,ram
ReleaseChannel,release_channel
SecondsSinceLastCrash,seconds_since_last_crash
StartupCrash,startup_crash
SystemMemoryUsePercentage,system_memory_use_percentage
TotalPageFile,total_page_file
TotalPhysicalMemory,total_physical_memory
TotalVirtualMemory,total_virtual_memory
Version,version
acceptLanguages,accept_languages
accessibilityServices,accessibility_services
activeAddons,active_addons
activeExperiment,active_experiment
activeGMPlugins,active_gm_plugins
activePlugins,active_plugins
adHocTablesDir,ad_hoc_tables_dir
additionalProperties,additional_properties
addonCompatibilityCheckEnabled,addon_compatibility_check_enabled
addonId,addon_id
addonVersion,addon_version
advancedLayers,advanced_layers
allowAutoplay,allow_autoplay
apiCall,api_call
apiVersion,api_version
appDisabled,app_disabled
appLocales,app_locales
appName,app_name
appUpdateChannel,app_update_channel
appVersion,app_version
appleModelId,apple_model_id
applicationId,application_id
applicationName,application_name
architecturesInBinary,architectures_in_binary
autoDownload,auto_download
availableLocales,available_locales
baseAddress,base_address
blocklistEnabled,blocklist_enabled
buildId,build_id
certSubject,cert_subject
changedFiles,changed_files
changesetID,changeset_id
clientId,client_id
closedTS,closed_ts
connType,conn_type
crashDate,crash_date
createdDate,created_date
createdTimestamp,created_timestamp
creationDate,creation_date
debugID,debug_id
debugName,debug_name
defaultBrowser,default_browser
defaultSearch,default_search
defaultSearchEngine,default_search_engine
defaultSearchEngineData,default_search_engine_data
description,description
detectedUri,detected_uri
detectedVersion,detected_version
deviceID,device_id
displayVersion,display_version
distributionId,distribution_id
distributionVersion,distribution_version
distributorChannel,distributor_channel
driver,driver
driverDate,driver_date
driverVersion,driver_version
e10sCohort,e10s_cohort
e10sEnabled,e10s_enabled
ecosystemClientId,ecosystem_client_id
effectiveContentProcessLevel,effective_content_process_level
encryptedData,encrypted_data
encryptionKeyId,encryption_key_id
engagedTS,engaged_ts
engagementType,engagement_type
errorModules,error_modules
eventId,event_id
expiredTS,expired_ts
fileSize,file_size
fileVersion,file_version
firstUseDate,first_use_date
firstView,first_view
flashUsage,flash_usage
flowId,flow_id
globalSettings,global_settings
gpuProcess,gpu_process
hasBinaryComponents,has_binary_components
hasCrashEnvironment,has_crash_environment
hasSync,has_sync
hotfixVersion,hotfix_version
installDay,install_day
installYear,install_year
ipc_channel_error,ipc_channel_error
isDefaultBrowser,is_default_browser
isStartup,is_startup
isStubProfile,is_stub_profile
isSystem,is_system
isTablet,is_tablet
isWow64,is_wow64
kernelVersion,kernel_version
keyedHistograms,keyed_histograms
l2cacheKB,l2cache_kb
l3cacheKB,l3cache_kb
landingSystem,landing_system
lastBuildId,last_build_id
lastVersion,last_version
launcherProcessState,launcher_process_state
learnMoreTS,learn_more_ts
loadDurationMS,load_duration_ms
loadPath,load_path
loaderName,loader_name
lostEventsCount,lost_events_count
memoryMB,memory_mb
mimeTypes,mime_types
moduleName,module_name
moduleTrustFlags,module_trust_flags
offeredTS,offered_ts
osName,os_name
osVersion,os_version
packetVersion,packet_version
pageId,page_id
pageSpecific,page_specific
partnerId,partner_id
partnerNames,partner_names
pingDiscardedForSize,ping_discarded_for_size
pioneerAddonMetadata,pioneer_addon_metadata
pioneerId,pioneer_id
pioneerUtilsVersion,pioneer_utils_version
placesBookmarksCount,places_bookmarks_count
placesPagesCount,places_pages_count
platformVersion,platform_version
pocketId,pocket_id
previousBuildId,previous_build_id
previousChannel,previous_channel
previousVersion,previous_version
prioData,prio_data
processStartTimestamp,process_start_timestamp
processType,process_type
processUptimeMS,process_uptime_ms
profileCreationDate,profile_creation_date
profileDate,profile_date
profileSubsessionCounter,profile_subsession_counter
promptResponse,prompt_response
pseudoDisplay,pseudo_display
pushDate,push_date
refreshRate,refresh_rate
regionalPrefsLocales,regional_prefs_locales
rememberCheckbox,remember_checkbox
requestedLocales,requested_locales
resetDate,reset_date
responseTime,response_time
reviewSystemUsed,review_system_used
runId,run_id
schemaName,schema_name
schemaVersion,schema_version
screenHeight,screen_height
screenWidth,screen_width
searchCohort,search_cohort
searchCounts,search_counts
sendFailure,send_failure
servicePackMajor,service_pack_major
servicePackMinor,service_pack_minor
sessionId,session_id
sessionState,session_state
settingsChanged,settings_changed
showTrackerStatsShare,show_tracker_stats_share
signedState,signed_state
sourcesJson,sources_json
spbeMaxConcurrentTabCount,spbe_max_concurrent_tab_count
spbeMaxConcurrentWindowCount,spbe_max_concurrent_window_count
spbeNavigationAboutNewtab,spbe_navigation_about_newtab
spbeNavigationContextmenu,spbe_navigation_contextmenu
spbeNavigationSearchbar,spbe_navigation_searchbar
spbeNavigationUrlbar,spbe_navigation_urlbar
spbeTabOpenEventCount,spbe_tab_open_event_count
spbeTotalUriCount,spbe_total_uri_count
spbeUnfilteredUriCount,spbe_unfiltered_uri_count
spbeUniqueDomainsCount,spbe_unique_domains_count
spbeWindowOpenEventCount,spbe_window_open_event_count
speedMHz,speed_m_hz
sqlTableName,sql_table_name
standardDeviation,standard_deviation
structVersion,struct_version
studyName,study_name
submissionURL,submission_url
subsessionId,subsession_id
subsessionLength,subsession_length
subsessionStartDate,subsession_start_date
subsysID,subsys_id
surveyId,survey_id
surveyVersion,survey_version
systemCpuCores,system_cpu_cores
systemCpuSpeedMhz,system_cpu_speed_mhz
systemGfxMonitors1ScreenWidth,system_gfx_monitors1_screen_width
systemGfxMonitors1ScreenWidthZeroIndexed,system_gfx_monitors1_screen_width_zero_indexed
systemLocales,system_locales
systemMemoryMb,system_memory_mb
tableName,table_name
targetBuildId,target_build_id
targetChannel,target_channel
targetDisplayVersion,target_display_version
targetVersion,target_version
telemetryEnabled,telemetry_enabled
textureSharing,texture_sharing
threadID,thread_id
threadName,thread_name
timezoneOffest,timezone_offest
totalBlockedAudibleMedia,total_blocked_audible_media
totalPages,total_pages
totalPagesAM,total_pages_am
totalTime,total_time
updateDay,update_day
updaterAvailable,updater_available
userDisabled,user_disabled
vendorID,vendor_id
virtualMaxMB,virtual_max_mb
votedTS,voted_ts
windowClosedTS,window_closed_ts
windowsBuildNumber,windows_build_number
windowsUBR,windows_ubr
xpcomAbi,xpcom_abi
xulLoadDurationMS,xul_load_duration_ms
1 AvailablePageFile available_page_file
2 AvailablePhysicalMemory available_physical_memory
3 AvailableVirtualMemory available_virtual_memory
4 BuildID build_id
5 D2DEnabled d2d_enabled
6 DWriteEnabled d_write_enabled
7 GPUActive gpu_active
8 Headless headless
9 IsGarbageCollecting is_garbage_collecting
10 LowEndMachine low_end_machine
11 ProductID product_id
12 ProductName product_name
13 RAM ram
14 ReleaseChannel release_channel
15 SecondsSinceLastCrash seconds_since_last_crash
16 StartupCrash startup_crash
17 SystemMemoryUsePercentage system_memory_use_percentage
18 TotalPageFile total_page_file
19 TotalPhysicalMemory total_physical_memory
20 TotalVirtualMemory total_virtual_memory
21 Version version
22 acceptLanguages accept_languages
23 accessibilityServices accessibility_services
24 activeAddons active_addons
25 activeExperiment active_experiment
26 activeGMPlugins active_gm_plugins
27 activePlugins active_plugins
28 adHocTablesDir ad_hoc_tables_dir
29 additionalProperties additional_properties
30 addonCompatibilityCheckEnabled addon_compatibility_check_enabled
31 addonId addon_id
32 addonVersion addon_version
33 advancedLayers advanced_layers
34 allowAutoplay allow_autoplay
35 apiCall api_call
36 apiVersion api_version
37 appDisabled app_disabled
38 appLocales app_locales
39 appName app_name
40 appUpdateChannel app_update_channel
41 appVersion app_version
42 appleModelId apple_model_id
43 applicationId application_id
44 applicationName application_name
45 architecturesInBinary architectures_in_binary
46 autoDownload auto_download
47 availableLocales available_locales
48 baseAddress base_address
49 blocklistEnabled blocklist_enabled
50 buildId build_id
51 certSubject cert_subject
52 changedFiles changed_files
53 changesetID changeset_id
54 clientId client_id
55 closedTS closed_ts
56 connType conn_type
57 crashDate crash_date
58 createdDate created_date
59 createdTimestamp created_timestamp
60 creationDate creation_date
61 debugID debug_id
62 debugName debug_name
63 defaultBrowser default_browser
64 defaultSearch default_search
65 defaultSearchEngine default_search_engine
66 defaultSearchEngineData default_search_engine_data
67 description description
68 detectedUri detected_uri
69 detectedVersion detected_version
70 deviceID device_id
71 displayVersion display_version
72 distributionId distribution_id
73 distributionVersion distribution_version
74 distributorChannel distributor_channel
75 driver driver
76 driverDate driver_date
77 driverVersion driver_version
78 e10sCohort e10s_cohort
79 e10sEnabled e10s_enabled
80 ecosystemClientId ecosystem_client_id
81 effectiveContentProcessLevel effective_content_process_level
82 encryptedData encrypted_data
83 encryptionKeyId encryption_key_id
84 engagedTS engaged_ts
85 engagementType engagement_type
86 errorModules error_modules
87 eventId event_id
88 expiredTS expired_ts
89 fileSize file_size
90 fileVersion file_version
91 firstUseDate first_use_date
92 firstView first_view
93 flashUsage flash_usage
94 flowId flow_id
95 globalSettings global_settings
96 gpuProcess gpu_process
97 hasBinaryComponents has_binary_components
98 hasCrashEnvironment has_crash_environment
99 hasSync has_sync
100 hotfixVersion hotfix_version
101 installDay install_day
102 installYear install_year
103 ipc_channel_error ipc_channel_error
104 isDefaultBrowser is_default_browser
105 isStartup is_startup
106 isStubProfile is_stub_profile
107 isSystem is_system
108 isTablet is_tablet
109 isWow64 is_wow64
110 kernelVersion kernel_version
111 keyedHistograms keyed_histograms
112 l2cacheKB l2cache_kb
113 l3cacheKB l3cache_kb
114 landingSystem landing_system
115 lastBuildId last_build_id
116 lastVersion last_version
117 launcherProcessState launcher_process_state
118 learnMoreTS learn_more_ts
119 loadDurationMS load_duration_ms
120 loadPath load_path
121 loaderName loader_name
122 lostEventsCount lost_events_count
123 memoryMB memory_mb
124 mimeTypes mime_types
125 moduleName module_name
126 moduleTrustFlags module_trust_flags
127 offeredTS offered_ts
128 osName os_name
129 osVersion os_version
130 packetVersion packet_version
131 pageId page_id
132 pageSpecific page_specific
133 partnerId partner_id
134 partnerNames partner_names
135 pingDiscardedForSize ping_discarded_for_size
136 pioneerAddonMetadata pioneer_addon_metadata
137 pioneerId pioneer_id
138 pioneerUtilsVersion pioneer_utils_version
139 placesBookmarksCount places_bookmarks_count
140 placesPagesCount places_pages_count
141 platformVersion platform_version
142 pocketId pocket_id
143 previousBuildId previous_build_id
144 previousChannel previous_channel
145 previousVersion previous_version
146 prioData prio_data
147 processStartTimestamp process_start_timestamp
148 processType process_type
149 processUptimeMS process_uptime_ms
150 profileCreationDate profile_creation_date
151 profileDate profile_date
152 profileSubsessionCounter profile_subsession_counter
153 promptResponse prompt_response
154 pseudoDisplay pseudo_display
155 pushDate push_date
156 refreshRate refresh_rate
157 regionalPrefsLocales regional_prefs_locales
158 rememberCheckbox remember_checkbox
159 requestedLocales requested_locales
160 resetDate reset_date
161 responseTime response_time
162 reviewSystemUsed review_system_used
163 runId run_id
164 schemaName schema_name
165 schemaVersion schema_version
166 screenHeight screen_height
167 screenWidth screen_width
168 searchCohort search_cohort
169 searchCounts search_counts
170 sendFailure send_failure
171 servicePackMajor service_pack_major
172 servicePackMinor service_pack_minor
173 sessionId session_id
174 sessionState session_state
175 settingsChanged settings_changed
176 showTrackerStatsShare show_tracker_stats_share
177 signedState signed_state
178 sourcesJson sources_json
179 spbeMaxConcurrentTabCount spbe_max_concurrent_tab_count
180 spbeMaxConcurrentWindowCount spbe_max_concurrent_window_count
181 spbeNavigationAboutNewtab spbe_navigation_about_newtab
182 spbeNavigationContextmenu spbe_navigation_contextmenu
183 spbeNavigationSearchbar spbe_navigation_searchbar
184 spbeNavigationUrlbar spbe_navigation_urlbar
185 spbeTabOpenEventCount spbe_tab_open_event_count
186 spbeTotalUriCount spbe_total_uri_count
187 spbeUnfilteredUriCount spbe_unfiltered_uri_count
188 spbeUniqueDomainsCount spbe_unique_domains_count
189 spbeWindowOpenEventCount spbe_window_open_event_count
190 speedMHz speed_m_hz
191 sqlTableName sql_table_name
192 standardDeviation standard_deviation
193 structVersion struct_version
194 studyName study_name
195 submissionURL submission_url
196 subsessionId subsession_id
197 subsessionLength subsession_length
198 subsessionStartDate subsession_start_date
199 subsysID subsys_id
200 surveyId survey_id
201 surveyVersion survey_version
202 systemCpuCores system_cpu_cores
203 systemCpuSpeedMhz system_cpu_speed_mhz
204 systemGfxMonitors1ScreenWidth system_gfx_monitors1_screen_width
205 systemGfxMonitors1ScreenWidthZeroIndexed system_gfx_monitors1_screen_width_zero_indexed
206 systemLocales system_locales
207 systemMemoryMb system_memory_mb
208 tableName table_name
209 targetBuildId target_build_id
210 targetChannel target_channel
211 targetDisplayVersion target_display_version
212 targetVersion target_version
213 telemetryEnabled telemetry_enabled
214 textureSharing texture_sharing
215 threadID thread_id
216 threadName thread_name
217 timezoneOffest timezone_offest
218 totalBlockedAudibleMedia total_blocked_audible_media
219 totalPages total_pages
220 totalPagesAM total_pages_am
221 totalTime total_time
222 updateDay update_day
223 updaterAvailable updater_available
224 userDisabled user_disabled
225 vendorID vendor_id
226 virtualMaxMB virtual_max_mb
227 votedTS voted_ts
228 windowClosedTS window_closed_ts
229 windowsBuildNumber windows_build_number
230 windowsUBR windows_ubr
231 xpcomAbi xpcom_abi
232 xulLoadDurationMS xul_load_duration_ms

Просмотреть файл

@ -0,0 +1,256 @@
AAAA,aaaa
AAAa,aa_aa
AAA7,aaa7
AAA_,aaa
AAaA,a_aa_a
AAaa,a_aaa
AAa7,a_aa7
AAa_,a_aa
AA7A,aa7a
AA7a,aa7a
AA77,aa77
AA7_,aa7
AA_A,aa_a
AA_a,aa_a
AA_7,aa_7
AA__,aa
AaAA,aa_aa
AaAa,aa_aa
AaA7,aa_a7
AaA_,aa_a
AaaA,aaa_a
Aaaa,aaaa
Aaa7,aaa7
Aaa_,aaa
Aa7A,aa7_a
Aa7a,aa7a
Aa77,aa77
Aa7_,aa7
Aa_A,aa_a
Aa_a,aa_a
Aa_7,aa_7
Aa__,aa
A7AA,a7aa
A7Aa,a7_aa
A7A7,a7a7
A7A_,a7a
A7aA,a7a_a
A7aa,a7aa
A7a7,a7a7
A7a_,a7a
A77A,a77a
A77a,a77a
A777,a777
A77_,a77
A7_A,a7_a
A7_a,a7_a
A7_7,a7_7
A7__,a7
A_AA,a_aa
A_Aa,a_aa
A_A7,a_a7
A_A_,a_a
A_aA,a_a_a
A_aa,a_aa
A_a7,a_a7
A_a_,a_a
A_7A,a_7a
A_7a,a_7a
A_77,a_77
A_7_,a_7
A__A,a_a
A__a,a_a
A__7,a_7
A___,a
aAAA,a_aaa
aAAa,a_a_aa
aAA7,a_aa7
aAA_,a_aa
aAaA,a_aa_a
aAaa,a_aaa
aAa7,a_aa7
aAa_,a_aa
aA7A,a_a7a
aA7a,a_a7a
aA77,a_a77
aA7_,a_a7
aA_A,a_a_a
aA_a,a_a_a
aA_7,a_a_7
aA__,a_a
aaAA,aa_aa
aaAa,aa_aa
aaA7,aa_a7
aaA_,aa_a
aaaA,aaa_a
aaaa,aaaa
aaa7,aaa7
aaa_,aaa
aa7A,aa7_a
aa7a,aa7a
aa77,aa77
aa7_,aa7
aa_A,aa_a
aa_a,aa_a
aa_7,aa_7
aa__,aa
a7AA,a7_aa
a7Aa,a7_aa
a7A7,a7_a7
a7A_,a7_a
a7aA,a7a_a
a7aa,a7aa
a7a7,a7a7
a7a_,a7a
a77A,a77_a
a77a,a77a
a777,a777
a77_,a77
a7_A,a7_a
a7_a,a7_a
a7_7,a7_7
a7__,a7
a_AA,a_aa
a_Aa,a_aa
a_A7,a_a7
a_A_,a_a
a_aA,a_a_a
a_aa,a_aa
a_a7,a_a7
a_a_,a_a
a_7A,a_7a
a_7a,a_7a
a_77,a_77
a_7_,a_7
a__A,a_a
a__a,a_a
a__7,a_7
a___,a
7AAA,7aaa
7AAa,7a_aa
7AA7,7aa7
7AA_,7aa
7AaA,7aa_a
7Aaa,7aaa
7Aa7,7aa7
7Aa_,7aa
7A7A,7a7a
7A7a,7a7a
7A77,7a77
7A7_,7a7
7A_A,7a_a
7A_a,7a_a
7A_7,7a_7
7A__,7a
7aAA,7a_aa
7aAa,7a_aa
7aA7,7a_a7
7aA_,7a_a
7aaA,7aa_a
7aaa,7aaa
7aa7,7aa7
7aa_,7aa
7a7A,7a7_a
7a7a,7a7a
7a77,7a77
7a7_,7a7
7a_A,7a_a
7a_a,7a_a
7a_7,7a_7
7a__,7a
77AA,77aa
77Aa,77aa
77A7,77a7
77A_,77a
77aA,77a_a
77aa,77aa
77a7,77a7
77a_,77a
777A,777a
777a,777a
7777,7777
777_,777
77_A,77_a
77_a,77_a
77_7,77_7
77__,77
7_AA,7_aa
7_Aa,7_aa
7_A7,7_a7
7_A_,7_a
7_aA,7_a_a
7_aa,7_aa
7_a7,7_a7
7_a_,7_a
7_7A,7_7a
7_7a,7_7a
7_77,7_77
7_7_,7_7
7__A,7_a
7__a,7_a
7__7,7_7
7___,7
_AAA,aaa
_AAa,a_aa
_AA7,aa7
_AA_,aa
_AaA,aa_a
_Aaa,aaa
_Aa7,aa7
_Aa_,aa
_A7A,a7a
_A7a,a7a
_A77,a77
_A7_,a7
_A_A,a_a
_A_a,a_a
_A_7,a_7
_A__,a
_aAA,a_aa
_aAa,a_aa
_aA7,a_a7
_aA_,a_a
_aaA,aa_a
_aaa,aaa
_aa7,aa7
_aa_,aa
_a7A,a7_a
_a7a,a7a
_a77,a77
_a7_,a7
_a_A,a_a
_a_a,a_a
_a_7,a_7
_a__,a
_7AA,7aa
_7Aa,7aa
_7A7,7a7
_7A_,7a
_7aA,7a_a
_7aa,7aa
_7a7,7a7
_7a_,7a
_77A,77a
_77a,77a
_777,777
_77_,77
_7_A,7_a
_7_a,7_a
_7_7,7_7
_7__,7
__AA,aa
__Aa,aa
__A7,a7
__A_,a
__aA,a_a
__aa,aa
__a7,a7
__a_,a
__7A,7a
__7a,7a
__77,77
__7_,7
___A,a
___a,a
___7,7
____,
1 AAAA aaaa
2 AAAa aa_aa
3 AAA7 aaa7
4 AAA_ aaa
5 AAaA a_aa_a
6 AAaa a_aaa
7 AAa7 a_aa7
8 AAa_ a_aa
9 AA7A aa7a
10 AA7a aa7a
11 AA77 aa77
12 AA7_ aa7
13 AA_A aa_a
14 AA_a aa_a
15 AA_7 aa_7
16 AA__ aa
17 AaAA aa_aa
18 AaAa aa_aa
19 AaA7 aa_a7
20 AaA_ aa_a
21 AaaA aaa_a
22 Aaaa aaaa
23 Aaa7 aaa7
24 Aaa_ aaa
25 Aa7A aa7_a
26 Aa7a aa7a
27 Aa77 aa77
28 Aa7_ aa7
29 Aa_A aa_a
30 Aa_a aa_a
31 Aa_7 aa_7
32 Aa__ aa
33 A7AA a7aa
34 A7Aa a7_aa
35 A7A7 a7a7
36 A7A_ a7a
37 A7aA a7a_a
38 A7aa a7aa
39 A7a7 a7a7
40 A7a_ a7a
41 A77A a77a
42 A77a a77a
43 A777 a777
44 A77_ a77
45 A7_A a7_a
46 A7_a a7_a
47 A7_7 a7_7
48 A7__ a7
49 A_AA a_aa
50 A_Aa a_aa
51 A_A7 a_a7
52 A_A_ a_a
53 A_aA a_a_a
54 A_aa a_aa
55 A_a7 a_a7
56 A_a_ a_a
57 A_7A a_7a
58 A_7a a_7a
59 A_77 a_77
60 A_7_ a_7
61 A__A a_a
62 A__a a_a
63 A__7 a_7
64 A___ a
65 aAAA a_aaa
66 aAAa a_a_aa
67 aAA7 a_aa7
68 aAA_ a_aa
69 aAaA a_aa_a
70 aAaa a_aaa
71 aAa7 a_aa7
72 aAa_ a_aa
73 aA7A a_a7a
74 aA7a a_a7a
75 aA77 a_a77
76 aA7_ a_a7
77 aA_A a_a_a
78 aA_a a_a_a
79 aA_7 a_a_7
80 aA__ a_a
81 aaAA aa_aa
82 aaAa aa_aa
83 aaA7 aa_a7
84 aaA_ aa_a
85 aaaA aaa_a
86 aaaa aaaa
87 aaa7 aaa7
88 aaa_ aaa
89 aa7A aa7_a
90 aa7a aa7a
91 aa77 aa77
92 aa7_ aa7
93 aa_A aa_a
94 aa_a aa_a
95 aa_7 aa_7
96 aa__ aa
97 a7AA a7_aa
98 a7Aa a7_aa
99 a7A7 a7_a7
100 a7A_ a7_a
101 a7aA a7a_a
102 a7aa a7aa
103 a7a7 a7a7
104 a7a_ a7a
105 a77A a77_a
106 a77a a77a
107 a777 a777
108 a77_ a77
109 a7_A a7_a
110 a7_a a7_a
111 a7_7 a7_7
112 a7__ a7
113 a_AA a_aa
114 a_Aa a_aa
115 a_A7 a_a7
116 a_A_ a_a
117 a_aA a_a_a
118 a_aa a_aa
119 a_a7 a_a7
120 a_a_ a_a
121 a_7A a_7a
122 a_7a a_7a
123 a_77 a_77
124 a_7_ a_7
125 a__A a_a
126 a__a a_a
127 a__7 a_7
128 a___ a
129 7AAA 7aaa
130 7AAa 7a_aa
131 7AA7 7aa7
132 7AA_ 7aa
133 7AaA 7aa_a
134 7Aaa 7aaa
135 7Aa7 7aa7
136 7Aa_ 7aa
137 7A7A 7a7a
138 7A7a 7a7a
139 7A77 7a77
140 7A7_ 7a7
141 7A_A 7a_a
142 7A_a 7a_a
143 7A_7 7a_7
144 7A__ 7a
145 7aAA 7a_aa
146 7aAa 7a_aa
147 7aA7 7a_a7
148 7aA_ 7a_a
149 7aaA 7aa_a
150 7aaa 7aaa
151 7aa7 7aa7
152 7aa_ 7aa
153 7a7A 7a7_a
154 7a7a 7a7a
155 7a77 7a77
156 7a7_ 7a7
157 7a_A 7a_a
158 7a_a 7a_a
159 7a_7 7a_7
160 7a__ 7a
161 77AA 77aa
162 77Aa 77aa
163 77A7 77a7
164 77A_ 77a
165 77aA 77a_a
166 77aa 77aa
167 77a7 77a7
168 77a_ 77a
169 777A 777a
170 777a 777a
171 7777 7777
172 777_ 777
173 77_A 77_a
174 77_a 77_a
175 77_7 77_7
176 77__ 77
177 7_AA 7_aa
178 7_Aa 7_aa
179 7_A7 7_a7
180 7_A_ 7_a
181 7_aA 7_a_a
182 7_aa 7_aa
183 7_a7 7_a7
184 7_a_ 7_a
185 7_7A 7_7a
186 7_7a 7_7a
187 7_77 7_77
188 7_7_ 7_7
189 7__A 7_a
190 7__a 7_a
191 7__7 7_7
192 7___ 7
193 _AAA aaa
194 _AAa a_aa
195 _AA7 aa7
196 _AA_ aa
197 _AaA aa_a
198 _Aaa aaa
199 _Aa7 aa7
200 _Aa_ aa
201 _A7A a7a
202 _A7a a7a
203 _A77 a77
204 _A7_ a7
205 _A_A a_a
206 _A_a a_a
207 _A_7 a_7
208 _A__ a
209 _aAA a_aa
210 _aAa a_aa
211 _aA7 a_a7
212 _aA_ a_a
213 _aaA aa_a
214 _aaa aaa
215 _aa7 aa7
216 _aa_ aa
217 _a7A a7_a
218 _a7a a7a
219 _a77 a77
220 _a7_ a7
221 _a_A a_a
222 _a_a a_a
223 _a_7 a_7
224 _a__ a
225 _7AA 7aa
226 _7Aa 7aa
227 _7A7 7a7
228 _7A_ 7a
229 _7aA 7a_a
230 _7aa 7aa
231 _7a7 7a7
232 _7a_ 7a
233 _77A 77a
234 _77a 77a
235 _777 777
236 _77_ 77
237 _7_A 7_a
238 _7_a 7_a
239 _7_7 7_7
240 _7__ 7
241 __AA aa
242 __Aa aa
243 __A7 a7
244 __A_ a
245 __aA a_a
246 __aa aa
247 __a7 a7
248 __a_ a
249 __7A 7a
250 __7a 7a
251 __77 77
252 __7_ 7
253 ___A a
254 ___a a
255 ___7 7
256 ____

Просмотреть файл

@ -0,0 +1,29 @@
import os
import csv
from pathlib import Path
from bigquery_etl.util.common import snake_case
def snake_case_test(case_name: str):
resource_path = Path("tests/resources/casing").resolve()
test_file = os.path.join(resource_path, case_name)
with open(test_file) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=",")
for row in csv_reader:
assert len(row) == 2
assert snake_case(row[0]) == row[1]
def test_snake_casing():
# all strings of length 3 drawn from the alphabet "aA7"
snake_case_test("alphanum_3.csv")
# all strings of length 4 drawn from the alphabet "aA7_"
snake_case_test("word_4.csv")
# all column names from mozilla-pipeline-schemas affected by snake_casing
# https://github.com/mozilla/jsonschema-transpiler/pull/79#issuecomment-509839572
# https://gist.github.com/acmiyaguchi/3f526c440b67ebe469bcb6ab2da5123f#file-readme-md
snake_case_test("mps-diff-integration.csv")

Просмотреть файл

@ -3,18 +3,16 @@
import itertools
import json
import re
import sys
import os.path
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent.resolve()))
from bigquery_etl.util.common import snake_case # noqa E402
SCALAR_TYPES = {"uint": "INT64", "string": "STRING", "boolean": "BOOL"}
def convert_camel_case(name):
"""Convert camel case to snake case."""
s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
def collect_probes(probes, schema_fields):
"""Collect scalars from probes and schema fields."""
scalars = {"main": [], "content": [], "gpu": []}
@ -39,7 +37,7 @@ def collect_probes(probes, schema_fields):
for p in record_in_processes:
collection[p].append(
(
convert_camel_case(probe["name"]).replace(".", "_"),
snake_case(probe["name"]).replace(".", "_"),
SCALAR_TYPES.get(
history[0]["details"]["kind"], history[0]["details"]["kind"]
),